Message ID | 20210601075354.5149-2-rppt@kernel.org (mailing list archive) |
---|---|
State | Deferred, archived |
Headers | show |
Series | x86/setup: always resrve the first 1M of RAM | expand |
On 06/01/21 at 10:53am, Mike Rapoport wrote: > From: Mike Rapoport <rppt@linux.ibm.com> ...... > diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c > index 7850111008a8..b15ebfe40a73 100644 > --- a/arch/x86/platform/efi/quirks.c > +++ b/arch/x86/platform/efi/quirks.c > @@ -450,6 +450,18 @@ void __init efi_free_boot_services(void) > size -= rm_size; > } Thanks for taking care of the low-1M excluding in efi_free_boot_services(), Mike. You might want to remove the old real mode excluding code either since it's been covered by your new code. diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index b15ebfe40a73..be814f2089ff 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -409,7 +409,6 @@ void __init efi_free_boot_services(void) for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; - size_t rm_size; if (md->type != EFI_BOOT_SERVICES_CODE && md->type != EFI_BOOT_SERVICES_DATA) { @@ -430,26 +429,6 @@ void __init efi_free_boot_services(void) */ efi_unmap_pages(md); - /* - * Nasty quirk: if all sub-1MB memory is used for boot - * services, we can get here without having allocated the - * real mode trampoline. It's too late to hand boot services - * memory back to the memblock allocator, so instead - * try to manually allocate the trampoline if needed. - * - * I've seen this on a Dell XPS 13 9350 with firmware - * 1.4.4 with SGX enabled booting Linux via Fedora 24's - * grub2-efi on a hard disk. (And no, I don't know why - * this happened, but Linux should still try to boot rather - * panicking early.) - */ - rm_size = real_mode_size_needed(); - if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { - set_real_mode_mem(start); - start += rm_size; - size -= rm_size; - } - /* * Don't free memory under 1M for two reasons: * - BIOS might clobber it > > + /* > + * Don't free memory under 1M for two reasons: > + * - BIOS might clobber it > + * - Crash kernel needs it to be reserved > + */ > + if (start + size < SZ_1M) > + continue; > + if (start < SZ_1M) { > + size -= (SZ_1M - start); > + start = SZ_1M; > + } > + > memblock_free_late(start, size); > } > > diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c > index 2e1c1bec0f9e..8ea285aca827 100644 > --- a/arch/x86/realmode/init.c > +++ b/arch/x86/realmode/init.c > @@ -29,14 +29,16 @@ void __init reserve_real_mode(void) > > /* Has to be under 1M so we can execute real-mode AP code. */ > mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); > - if (!mem) { > + if (!mem) > pr_info("No sub-1M memory is available for the trampoline\n"); > - return; > - } > + else > + set_real_mode_mem(mem); > > - memblock_reserve(mem, size); > - set_real_mode_mem(mem); > - crash_reserve_low_1M(); > + /* > + * Unconditionally reserve the entire fisrt 1M, see comment in > + * setup_arch() > + */ > + memblock_reserve(0, SZ_1M); > } > > static void sme_sev_setup_real_mode(struct trampoline_header *th) > -- > 2.28.0 >
Hi Baoquan, On Tue, Jun 01, 2021 at 05:06:53PM +0800, Baoquan He wrote: > On 06/01/21 at 10:53am, Mike Rapoport wrote: > > From: Mike Rapoport <rppt@linux.ibm.com> > ...... > > > diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c > > index 7850111008a8..b15ebfe40a73 100644 > > --- a/arch/x86/platform/efi/quirks.c > > +++ b/arch/x86/platform/efi/quirks.c > > @@ -450,6 +450,18 @@ void __init efi_free_boot_services(void) > > size -= rm_size; > > } > > Thanks for taking care of the low-1M excluding in > efi_free_boot_services(), Mike. You might want to remove the old real > mode excluding code either since it's been covered by your new code. Unfortunately I can't because it's important that set_real_mode_mem() would reuse memory that was occupied by EFI boot services and that is being freed here. According to the changelog of 5bc653b73182 ("x86/efi: Allocate a trampoline if needed in efi_free_boot_services()"), that system has EBDA at 0x2c000 so we reserve everything from 0x2c000 to 0xa0000 in reserve_bios_regions() and most of the memory below 0x2c0000 is used by EFI boot data. So with such memory layout reserve_real_mode() won't be able to allocate the trampoline. Yet, when the EFI boot data is free, the room occupied by it will be reused by the real mode trampoline via set_real_mode_mem(). > diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c > index b15ebfe40a73..be814f2089ff 100644 > --- a/arch/x86/platform/efi/quirks.c > +++ b/arch/x86/platform/efi/quirks.c > @@ -409,7 +409,6 @@ void __init efi_free_boot_services(void) > for_each_efi_memory_desc(md) { > unsigned long long start = md->phys_addr; > unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; > - size_t rm_size; > > if (md->type != EFI_BOOT_SERVICES_CODE && > md->type != EFI_BOOT_SERVICES_DATA) { > @@ -430,26 +429,6 @@ void __init efi_free_boot_services(void) > */ > efi_unmap_pages(md); > > - /* > - * Nasty quirk: if all sub-1MB memory is used for boot > - * services, we can get here without having allocated the > - * real mode trampoline. It's too late to hand boot services > - * memory back to the memblock allocator, so instead > - * try to manually allocate the trampoline if needed. > - * > - * I've seen this on a Dell XPS 13 9350 with firmware > - * 1.4.4 with SGX enabled booting Linux via Fedora 24's > - * grub2-efi on a hard disk. (And no, I don't know why > - * this happened, but Linux should still try to boot rather > - * panicking early.) > - */ > - rm_size = real_mode_size_needed(); > - if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { > - set_real_mode_mem(start); > - start += rm_size; > - size -= rm_size; > - } > - > /* > * Don't free memory under 1M for two reasons: > * - BIOS might clobber it > > > > > + /* > > + * Don't free memory under 1M for two reasons: > > + * - BIOS might clobber it > > + * - Crash kernel needs it to be reserved > > + */ > > + if (start + size < SZ_1M) > > + continue; > > + if (start < SZ_1M) { > > + size -= (SZ_1M - start); > > + start = SZ_1M; > > + } > > + > > memblock_free_late(start, size); > > } > > > > diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c > > index 2e1c1bec0f9e..8ea285aca827 100644 > > --- a/arch/x86/realmode/init.c > > +++ b/arch/x86/realmode/init.c > > @@ -29,14 +29,16 @@ void __init reserve_real_mode(void) > > > > /* Has to be under 1M so we can execute real-mode AP code. */ > > mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); > > - if (!mem) { > > + if (!mem) > > pr_info("No sub-1M memory is available for the trampoline\n"); > > - return; > > - } > > + else > > + set_real_mode_mem(mem); > > > > - memblock_reserve(mem, size); > > - set_real_mode_mem(mem); > > - crash_reserve_low_1M(); > > + /* > > + * Unconditionally reserve the entire fisrt 1M, see comment in > > + * setup_arch() > > + */ > > + memblock_reserve(0, SZ_1M); > > } > > > > static void sme_sev_setup_real_mode(struct trampoline_header *th) > > -- > > 2.28.0 > > >
On Tue, Jun 01, 2021 at 10:53:52AM +0300, Mike Rapoport wrote: > From: Mike Rapoport <rppt@linux.ibm.com> > > There are BIOSes that are known to corrupt the memory under 1M, or more > precisely under 640K because the memory above 640K is anyway reserved for > the EGA/VGA frame buffer and BIOS. > > To prevent usage of the memory that will be potentially clobbered by the > kernel, the beginning of the memory is always reserved. The exact size of > the reserved area is determined by CONFIG_X86_RESERVE_LOW build time and > reservelow command line option. The reserved range may be from 4K to 640K > with the default of 64K. There are also configurations that reserve the > entire 1M range, like machines with SandyBridge graphic devices or systems > that enable crash kernel. > > In addition to the potentially clobbered memory, EBDA of unknown size may > be as low as 128K and the memory above that EBDA start is also reserved > early. > > It would have been possible to reserve the entire range under 1M unless for > the real mode trampoline that must reside in that area. > > To accommodate placement of the real mode trampoline and keep the memory > safe from being clobbered by BIOS reserve the first 64K of RAM before > memory allocations are possible and then, after the real mode trampoline is > allocated, reserve the entire range from 0 to 1M. > > Update trim_snb_memory() and reserve_real_mode() to avoid redundant > reservations of the same memory range. > > Also make sure the memory under 1M is not getting freed by > efi_free_boot_services(). > > Fixes: a799c2bd29d1 ("x86/setup: Consolidate early memory reservations") > Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> > --- > arch/x86/kernel/setup.c | 35 ++++++++++++++++++++-------------- > arch/x86/platform/efi/quirks.c | 12 ++++++++++++ > arch/x86/realmode/init.c | 14 ++++++++------ > 3 files changed, 41 insertions(+), 20 deletions(-) Ok, let's try it. Booting on a couple of boxes looks ok here, the difference is visible: - DMA zone: 30 pages reserved + DMA zone: 159 pages reserved On the other box, it was already reserving so many pages even before DMA zone: 159 pages reserved i.e., the first 640K. But it's not like I had problems before with early reservations so my testing doesn't mean a whole lot. Hugh's testing sounds good, lemme add his tag too. Thx.
On 6/1/21 12:53 AM, Mike Rapoport wrote: > There are BIOSes that are known to corrupt the memory under 1M, or more > precisely under 640K because the memory above 640K is anyway reserved for > the EGA/VGA frame buffer and BIOS. Should there have been a Cc: stable@ on this? Seems like the kind of thing we'd want backported.
On Thu, Jul 01, 2021 at 10:15:29AM -0700, Dave Hansen wrote: > On 6/1/21 12:53 AM, Mike Rapoport wrote: > > There are BIOSes that are known to corrupt the memory under 1M, or more > > precisely under 640K because the memory above 640K is anyway reserved for > > the EGA/VGA frame buffer and BIOS. > > Should there have been a Cc: stable@ on this? > > Seems like the kind of thing we'd want backported. The commit this patch is fixing (a799c2bd29d1) went to v5.13-rc1, so there is no need to backport it.
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 72920af0b3c0..22e9a17d6ac3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -637,11 +637,11 @@ static void __init trim_snb_memory(void) * them from accessing certain memory ranges, namely anything below * 1M and in the pages listed in bad_pages[] above. * - * To avoid these pages being ever accessed by SNB gfx devices - * reserve all memory below the 1 MB mark and bad_pages that have - * not already been reserved at boot time. + * To avoid these pages being ever accessed by SNB gfx devices reserve + * bad_pages that have not already been reserved at boot time. + * All memory below the 1 MB mark is anyway reserved later during + * setup_arch(), so there is no need to reserve it here. */ - memblock_reserve(0, 1<<20); for (i = 0; i < ARRAY_SIZE(bad_pages); i++) { if (memblock_reserve(bad_pages[i], PAGE_SIZE)) @@ -733,14 +733,14 @@ static void __init early_reserve_memory(void) * The first 4Kb of memory is a BIOS owned area, but generally it is * not listed as such in the E820 table. * - * Reserve the first memory page and typically some additional - * memory (64KiB by default) since some BIOSes are known to corrupt - * low memory. See the Kconfig help text for X86_RESERVE_LOW. + * Reserve the first 64K of memory since some BIOSes are known to + * corrupt low memory. After the real mode trampoline is allocated the + * rest of the memory below 640k is reserved. * * In addition, make sure page 0 is always reserved because on * systems with L1TF its contents can be leaked to user processes. */ - memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); + memblock_reserve(0, SZ_64K); early_reserve_initrd(); @@ -751,6 +751,7 @@ static void __init early_reserve_memory(void) reserve_ibft_region(); reserve_bios_regions(); + trim_snb_memory(); } /* @@ -1081,14 +1082,20 @@ void __init setup_arch(char **cmdline_p) (max_pfn_mapped<<PAGE_SHIFT) - 1); #endif - reserve_real_mode(); - /* - * Reserving memory causing GPU hangs on Sandy Bridge integrated - * graphics devices should be done after we allocated memory under - * 1M for the real mode trampoline. + * Find free memory for the real mode trampoline and place it + * there. + * If there is not enough free memory under 1M, on EFI-enabled + * systems there will be additional attempt to reclaim the memory + * for the real mode trampoline at efi_free_boot_services(). + * + * Unconditionally reserve the entire first 1M of RAM because + * BIOSes are know to corrupt low memory and several + * hundred kilobytes are not worth complex detection what memory gets + * clobbered. Moreover, on machines with SandyBridge graphics or in + * setups that use crashkernel the entire 1M is anyway reserved. */ - trim_snb_memory(); + reserve_real_mode(); init_mem_mapping(); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 7850111008a8..b15ebfe40a73 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -450,6 +450,18 @@ void __init efi_free_boot_services(void) size -= rm_size; } + /* + * Don't free memory under 1M for two reasons: + * - BIOS might clobber it + * - Crash kernel needs it to be reserved + */ + if (start + size < SZ_1M) + continue; + if (start < SZ_1M) { + size -= (SZ_1M - start); + start = SZ_1M; + } + memblock_free_late(start, size); } diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 2e1c1bec0f9e..8ea285aca827 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -29,14 +29,16 @@ void __init reserve_real_mode(void) /* Has to be under 1M so we can execute real-mode AP code. */ mem = memblock_find_in_range(0, 1<<20, size, PAGE_SIZE); - if (!mem) { + if (!mem) pr_info("No sub-1M memory is available for the trampoline\n"); - return; - } + else + set_real_mode_mem(mem); - memblock_reserve(mem, size); - set_real_mode_mem(mem); - crash_reserve_low_1M(); + /* + * Unconditionally reserve the entire fisrt 1M, see comment in + * setup_arch() + */ + memblock_reserve(0, SZ_1M); } static void sme_sev_setup_real_mode(struct trampoline_header *th)