diff mbox series

[PATCHv5,06/12] x86/boot/compressed: Handle unaccepted memory

Message ID 20220425033934.68551-7-kirill.shutemov@linux.intel.com (mailing list archive)
State New
Headers show
Series mm, x86/cc: Implement support for unaccepted memory | expand

Commit Message

kirill.shutemov@linux.intel.com April 25, 2022, 3:39 a.m. UTC
The firmware will pre-accept the memory used to run the stub. But, the
stub is responsible for accepting the memory into which it decompresses
the main kernel. Accept memory just before decompression starts.

The stub is also responsible for choosing a physical address in which to
place the decompressed kernel image. The KASLR mechanism will randomize
this physical address. Since the unaccepted memory region is relatively
small, KASLR would be quite ineffective if it only used the pre-accepted
area (EFI_CONVENTIONAL_MEMORY). Ensure that KASLR randomizes among the
entire physical address space by also including EFI_UNACCEPTED_MEMOR

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/Makefile        |  2 +-
 arch/x86/boot/compressed/kaslr.c         | 14 ++++++++++++--
 arch/x86/boot/compressed/mem.c           | 21 +++++++++++++++++++++
 arch/x86/boot/compressed/misc.c          |  9 +++++++++
 arch/x86/include/asm/unaccepted_memory.h |  2 ++
 5 files changed, 45 insertions(+), 3 deletions(-)

Comments

Michael Roth April 27, 2022, 12:17 a.m. UTC | #1
On Mon, Apr 25, 2022 at 06:39:28AM +0300, Kirill A. Shutemov wrote:
> The firmware will pre-accept the memory used to run the stub. But, the
> stub is responsible for accepting the memory into which it decompresses
> the main kernel. Accept memory just before decompression starts.
> 
> The stub is also responsible for choosing a physical address in which to
> place the decompressed kernel image. The KASLR mechanism will randomize
> this physical address. Since the unaccepted memory region is relatively
> small, KASLR would be quite ineffective if it only used the pre-accepted
> area (EFI_CONVENTIONAL_MEMORY). Ensure that KASLR randomizes among the
> entire physical address space by also including EFI_UNACCEPTED_MEMOR
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/boot/compressed/Makefile        |  2 +-
>  arch/x86/boot/compressed/kaslr.c         | 14 ++++++++++++--
>  arch/x86/boot/compressed/mem.c           | 21 +++++++++++++++++++++
>  arch/x86/boot/compressed/misc.c          |  9 +++++++++
>  arch/x86/include/asm/unaccepted_memory.h |  2 ++
>  5 files changed, 45 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> index 7f672f7e2fea..b59007e57cbf 100644
> --- a/arch/x86/boot/compressed/Makefile
> +++ b/arch/x86/boot/compressed/Makefile
> @@ -102,7 +102,7 @@ endif
>  
>  vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
>  vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
> -vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/bitmap.o $(obj)/mem.o
> +vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/bitmap.o $(obj)/find.o $(obj)/mem.o

Since it's possible to have CONFIG_UNACCEPTED_MEMORY=y while
CONFIG_INTEL_TDX_GUEST=n (e.g. for SNP-only guest kernels), this can
result in mem.o reporting linker errors due to tdx_accept_memory() not
being defined. I think it needs a stub for !CONFIG_INTEL_TDX_GUEST, or
something along that line.

>  
>  vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
>  efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index 411b268bc0a2..59db90626042 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -725,10 +725,20 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
>  		 * but in practice there's firmware where using that memory leads
>  		 * to crashes.
>  		 *
> -		 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
> +		 * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
> +		 * supported) are guaranteed to be free.
>  		 */
> -		if (md->type != EFI_CONVENTIONAL_MEMORY)
> +
> +		switch (md->type) {
> +		case EFI_CONVENTIONAL_MEMORY:
> +			break;
> +		case EFI_UNACCEPTED_MEMORY:

Just FYI, but with latest tip boot/compressed now relies on a separate header
in arch/x86/boot/compressed/efi.h where this need to be defined again.
Kirill A . Shutemov April 27, 2022, 2:19 p.m. UTC | #2
On Tue, Apr 26, 2022 at 07:17:56PM -0500, Michael Roth wrote:
> On Mon, Apr 25, 2022 at 06:39:28AM +0300, Kirill A. Shutemov wrote:
> > The firmware will pre-accept the memory used to run the stub. But, the
> > stub is responsible for accepting the memory into which it decompresses
> > the main kernel. Accept memory just before decompression starts.
> > 
> > The stub is also responsible for choosing a physical address in which to
> > place the decompressed kernel image. The KASLR mechanism will randomize
> > this physical address. Since the unaccepted memory region is relatively
> > small, KASLR would be quite ineffective if it only used the pre-accepted
> > area (EFI_CONVENTIONAL_MEMORY). Ensure that KASLR randomizes among the
> > entire physical address space by also including EFI_UNACCEPTED_MEMOR
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  arch/x86/boot/compressed/Makefile        |  2 +-
> >  arch/x86/boot/compressed/kaslr.c         | 14 ++++++++++++--
> >  arch/x86/boot/compressed/mem.c           | 21 +++++++++++++++++++++
> >  arch/x86/boot/compressed/misc.c          |  9 +++++++++
> >  arch/x86/include/asm/unaccepted_memory.h |  2 ++
> >  5 files changed, 45 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
> > index 7f672f7e2fea..b59007e57cbf 100644
> > --- a/arch/x86/boot/compressed/Makefile
> > +++ b/arch/x86/boot/compressed/Makefile
> > @@ -102,7 +102,7 @@ endif
> >  
> >  vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
> >  vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
> > -vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/bitmap.o $(obj)/mem.o
> > +vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/bitmap.o $(obj)/find.o $(obj)/mem.o
> 
> Since it's possible to have CONFIG_UNACCEPTED_MEMORY=y while
> CONFIG_INTEL_TDX_GUEST=n (e.g. for SNP-only guest kernels), this can
> result in mem.o reporting linker errors due to tdx_accept_memory() not
> being defined. I think it needs a stub for !CONFIG_INTEL_TDX_GUEST, or
> something along that line.

Fair enough. This would do the trick:

diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index 539fff27de49..4a49a2438180 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -19,6 +19,9 @@ static bool is_tdx_guest(void)
 	static bool once;
 	static bool is_tdx;

+	if (!IS_ENABLED(CONFIG_INTEL_TDX_GUEST))
+	    return false;
+
 	if (!once) {
 		u32 eax, sig[3];

> >  vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
> >  efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
> > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > index 411b268bc0a2..59db90626042 100644
> > --- a/arch/x86/boot/compressed/kaslr.c
> > +++ b/arch/x86/boot/compressed/kaslr.c
> > @@ -725,10 +725,20 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
> >  		 * but in practice there's firmware where using that memory leads
> >  		 * to crashes.
> >  		 *
> > -		 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
> > +		 * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
> > +		 * supported) are guaranteed to be free.
> >  		 */
> > -		if (md->type != EFI_CONVENTIONAL_MEMORY)
> > +
> > +		switch (md->type) {
> > +		case EFI_CONVENTIONAL_MEMORY:
> > +			break;
> > +		case EFI_UNACCEPTED_MEMORY:
> 
> Just FYI, but with latest tip boot/compressed now relies on a separate header
> in arch/x86/boot/compressed/efi.h where this need to be defined again.

Right.

Borislav, how do you want to handle this? Do you want me to rebase the
tree to a specific branch?
Wander Lairson Costa April 29, 2022, 1:10 p.m. UTC | #3
On Mon, Apr 25, 2022 at 06:39:28AM +0300, Kirill A. Shutemov wrote:
> The firmware will pre-accept the memory used to run the stub. But, the
> stub is responsible for accepting the memory into which it decompresses
> the main kernel. Accept memory just before decompression starts.
> 
> The stub is also responsible for choosing a physical address in which to
> place the decompressed kernel image. The KASLR mechanism will randomize
> this physical address. Since the unaccepted memory region is relatively
> small, KASLR would be quite ineffective if it only used the pre-accepted
> area (EFI_CONVENTIONAL_MEMORY). Ensure that KASLR randomizes among the
> entire physical address space by also including EFI_UNACCEPTED_MEMOR

nit: s/EFI_UNACCEPTED_MEMOR/EFI_UNACCEPTED_MEMORY./

[snip]
Borislav Petkov May 3, 2022, 2:12 p.m. UTC | #4
On Mon, Apr 25, 2022 at 06:39:28AM +0300, Kirill A. Shutemov wrote:
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index 411b268bc0a2..59db90626042 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -725,10 +725,20 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
>  		 * but in practice there's firmware where using that memory leads
>  		 * to crashes.
>  		 *
> -		 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
> +		 * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
> +		 * supported) are guaranteed to be free.
>  		 */
> -		if (md->type != EFI_CONVENTIONAL_MEMORY)
> +
> +		switch (md->type) {
> +		case EFI_CONVENTIONAL_MEMORY:
> +			break;
> +		case EFI_UNACCEPTED_MEMORY:
> +			if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> +				break;
>  			continue;
> +		default:
> +			continue;
> +		}

Is there any special reason for this to be a switch-case or can it
simply be a compound conditional if (bla...) ?

> @@ -66,3 +69,21 @@ void process_unaccepted_memory(struct boot_params *params, u64 start, u64 end)
>  	bitmap_set((unsigned long *)params->unaccepted_memory,
>  		   start / PMD_SIZE, (end - start) / PMD_SIZE);
>  }
> +
> +void accept_memory(phys_addr_t start, phys_addr_t end)
> +{
> +	unsigned long range_start, range_end;
> +	unsigned long *unaccepted_memory;

Please shorten that name so that you don't have to break the lines below.

> +	unsigned long bitmap_size;
> +
> +	unaccepted_memory = (unsigned long *)boot_params->unaccepted_memory;
> +	range_start = start / PMD_SIZE;
> +	bitmap_size = DIV_ROUND_UP(end, PMD_SIZE);
> +
> +	for_each_set_bitrange_from(range_start, range_end,
> +				   unaccepted_memory, bitmap_size) {
> +		__accept_memory(range_start * PMD_SIZE, range_end * PMD_SIZE);
> +		bitmap_clear(unaccepted_memory,
> +			     range_start, range_end - range_start);
> +	}
> +}
> diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> index fa8969fad011..285b37e28074 100644
> --- a/arch/x86/boot/compressed/misc.c
> +++ b/arch/x86/boot/compressed/misc.c
> @@ -18,6 +18,7 @@
>  #include "../string.h"
>  #include "../voffset.h"
>  #include <asm/bootparam_utils.h>
> +#include <asm/unaccepted_memory.h>
>  
>  /*
>   * WARNING!!
> @@ -451,6 +452,14 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
>  #endif
>  
>  	debug_putstr("\nDecompressing Linux... ");
> +
> +#ifdef CONFIG_UNACCEPTED_MEMORY

IS_ENABLED() perhaps?

> +	if (boot_params->unaccepted_memory) {

Also, that ->unaccepted_memory will be set only when
ACONFIG_UNACCEPTED_MEMORY is set, FAICT.
Borislav Petkov May 3, 2022, 2:15 p.m. UTC | #5
On Wed, Apr 27, 2022 at 05:19:14PM +0300, Kirill A. Shutemov wrote:
> Borislav, how do you want to handle this? Do you want me to rebase the
> tree to a specific branch?

All patchsets for tip should usually be based ontop of current
tip/master.

The compressed/efi.h change is ontop of tip:x86/sev so we might have to
do some patch tetris...
Kirill A . Shutemov May 6, 2022, 3:30 p.m. UTC | #6
On Tue, May 03, 2022 at 04:12:55PM +0200, Borislav Petkov wrote:
> On Mon, Apr 25, 2022 at 06:39:28AM +0300, Kirill A. Shutemov wrote:
> > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > index 411b268bc0a2..59db90626042 100644
> > --- a/arch/x86/boot/compressed/kaslr.c
> > +++ b/arch/x86/boot/compressed/kaslr.c
> > @@ -725,10 +725,20 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
> >  		 * but in practice there's firmware where using that memory leads
> >  		 * to crashes.
> >  		 *
> > -		 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
> > +		 * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
> > +		 * supported) are guaranteed to be free.
> >  		 */
> > -		if (md->type != EFI_CONVENTIONAL_MEMORY)
> > +
> > +		switch (md->type) {
> > +		case EFI_CONVENTIONAL_MEMORY:
> > +			break;
> > +		case EFI_UNACCEPTED_MEMORY:
> > +			if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> > +				break;
> >  			continue;
> > +		default:
> > +			continue;
> > +		}
> 
> Is there any special reason for this to be a switch-case or can it
> simply be a compound conditional if (bla...) ?

The equivalent 'if' statement is something like:

		if (md->type != EFI_CONVENTIONAL_MEMORY &&
		    !(md->type == EFI_UNACCEPTED_MEMORY && IS_ENABLED(CONFIG_UNACCEPTED_MEMORY)))
		    continue;

I find it harder to follow.

Do you want me to change to the 'if' anyway?
Borislav Petkov May 10, 2022, 11:03 a.m. UTC | #7
On Fri, May 06, 2022 at 06:30:13PM +0300, Kirill A. Shutemov wrote:
> I find it harder to follow.

If in doubt, always consider using a helper function:

---

diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
index 7db2f41b54cd..cf475243b6d5 100644
--- a/arch/x86/boot/compressed/efi.h
+++ b/arch/x86/boot/compressed/efi.h
@@ -32,6 +32,7 @@ typedef	struct {
 } efi_table_hdr_t;
 
 #define EFI_CONVENTIONAL_MEMORY		 7
+#define EFI_UNACCEPTED_MEMORY		15
 
 #define EFI_MEMORY_MORE_RELIABLE \
 				((u64)0x0000000000010000ULL)	/* higher reliability */
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 28b91df9d31e..39bb4c319dfc 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -671,6 +671,23 @@ static bool process_mem_region(struct mem_vector *region,
 }
 
 #ifdef CONFIG_EFI
+
+/*
+ * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are guaranteed
+ * to be free.
+ */
+static inline bool memory_type_is_free(efi_memory_desc_t *md)
+{
+	if (md->type == EFI_CONVENTIONAL_MEMORY)
+		return true;
+
+	if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
+		if (md->type == EFI_UNACCEPTED_MEMORY)
+			return true;
+
+	return false;
+}
+
 /*
  * Returns true if we processed the EFI memmap, which we prefer over the E820
  * table if it is available.
@@ -723,21 +740,9 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
 		 * free memory and thus available to place the kernel image into,
 		 * but in practice there's firmware where using that memory leads
 		 * to crashes.
-		 *
-		 * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
-		 * supported) are guaranteed to be free.
 		 */
-
-		switch (md->type) {
-		case EFI_CONVENTIONAL_MEMORY:
-			break;
-		case EFI_UNACCEPTED_MEMORY:
-			if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
-				break;
+		if (!memory_type_is_free(md))
 			continue;
-		default:
-			continue;
-		}
 
 		if (efi_soft_reserve_enabled() &&
 		    (md->attribute & EFI_MEMORY_SP))
Dionna Amalie Glaze May 13, 2022, 5:31 a.m. UTC | #8
Kirill, I've been tracking these changes to see if we can handle the
unaccepted memory type for SEV-SNP, but testing has been an issue. The
proposed patch in Ovmf to introduce unaccepted memory seems to have stalled
out last September (
https://www.mail-archive.com/devel@edk2.groups.io/msg35842.html) and is
particularly difficult to adapt to SEV-SNP since it doesn't follow the TDVF
way of initializing all memory. Is there a different development I might
have missed so that we might test these cases? Without the UEFI introducing
EFI_UNACCEPTED_MEMORY type, any kernel uses are essentially dead code.

Thanks,
-Dionna

On Tue, May 10, 2022 at 4:04 AM Borislav Petkov <bp@alien8.de> wrote:

> On Fri, May 06, 2022 at 06:30:13PM +0300, Kirill A. Shutemov wrote:
> > I find it harder to follow.
>
> If in doubt, always consider using a helper function:
>
> ---
>
> diff --git a/arch/x86/boot/compressed/efi.h
> b/arch/x86/boot/compressed/efi.h
> index 7db2f41b54cd..cf475243b6d5 100644
> --- a/arch/x86/boot/compressed/efi.h
> +++ b/arch/x86/boot/compressed/efi.h
> @@ -32,6 +32,7 @@ typedef       struct {
>  } efi_table_hdr_t;
>
>  #define EFI_CONVENTIONAL_MEMORY                 7
> +#define EFI_UNACCEPTED_MEMORY          15
>
>  #define EFI_MEMORY_MORE_RELIABLE \
>                                 ((u64)0x0000000000010000ULL)    /* higher
> reliability */
> diff --git a/arch/x86/boot/compressed/kaslr.c
> b/arch/x86/boot/compressed/kaslr.c
> index 28b91df9d31e..39bb4c319dfc 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -671,6 +671,23 @@ static bool process_mem_region(struct mem_vector
> *region,
>  }
>
>  #ifdef CONFIG_EFI
> +
> +/*
> + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported)
> are guaranteed
> + * to be free.
> + */
> +static inline bool memory_type_is_free(efi_memory_desc_t *md)
> +{
> +       if (md->type == EFI_CONVENTIONAL_MEMORY)
> +               return true;
> +
> +       if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> +               if (md->type == EFI_UNACCEPTED_MEMORY)
> +                       return true;
> +
> +       return false;
> +}
> +
>  /*
>   * Returns true if we processed the EFI memmap, which we prefer over the
> E820
>   * table if it is available.
> @@ -723,21 +740,9 @@ process_efi_entries(unsigned long minimum, unsigned
> long image_size)
>                  * free memory and thus available to place the kernel
> image into,
>                  * but in practice there's firmware where using that
> memory leads
>                  * to crashes.
> -                *
> -                * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY
> (if
> -                * supported) are guaranteed to be free.
>                  */
> -
> -               switch (md->type) {
> -               case EFI_CONVENTIONAL_MEMORY:
> -                       break;
> -               case EFI_UNACCEPTED_MEMORY:
> -                       if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> -                               break;
> +               if (!memory_type_is_free(md))
>                         continue;
> -               default:
> -                       continue;
> -               }
>
>                 if (efi_soft_reserve_enabled() &&
>                     (md->attribute & EFI_MEMORY_SP))
> --
> Regards/Gruss,
>     Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette
>
Dionna Amalie Glaze May 13, 2022, 5:34 a.m. UTC | #9
Kirill, I've been tracking these changes to see if we can handle the
unaccepted memory type for SEV-SNP, but testing has been an issue. The
proposed patch in Ovmf to introduce unaccepted memory seems to have
stalled out last September
(https://www.mail-archive.com/devel@edk2.groups.io/msg35842.html) and
is particularly difficult to adapt to SEV-SNP since it doesn't follow
the TDVF way of initializing all memory. Is there a different
development I might have missed so that we might test these cases?
Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any kernel
uses are essentially dead code.

Thanks,
-Dionna

(apologies for repost in text mode)

On Tue, May 10, 2022 at 4:04 AM Borislav Petkov <bp@alien8.de> wrote:
>
> On Fri, May 06, 2022 at 06:30:13PM +0300, Kirill A. Shutemov wrote:
> > I find it harder to follow.
>
> If in doubt, always consider using a helper function:
>
> ---
>
> diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
> index 7db2f41b54cd..cf475243b6d5 100644
> --- a/arch/x86/boot/compressed/efi.h
> +++ b/arch/x86/boot/compressed/efi.h
> @@ -32,6 +32,7 @@ typedef       struct {
>  } efi_table_hdr_t;
>
>  #define EFI_CONVENTIONAL_MEMORY                 7
> +#define EFI_UNACCEPTED_MEMORY          15
>
>  #define EFI_MEMORY_MORE_RELIABLE \
>                                 ((u64)0x0000000000010000ULL)    /* higher reliability */
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index 28b91df9d31e..39bb4c319dfc 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -671,6 +671,23 @@ static bool process_mem_region(struct mem_vector *region,
>  }
>
>  #ifdef CONFIG_EFI
> +
> +/*
> + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are guaranteed
> + * to be free.
> + */
> +static inline bool memory_type_is_free(efi_memory_desc_t *md)
> +{
> +       if (md->type == EFI_CONVENTIONAL_MEMORY)
> +               return true;
> +
> +       if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> +               if (md->type == EFI_UNACCEPTED_MEMORY)
> +                       return true;
> +
> +       return false;
> +}
> +
>  /*
>   * Returns true if we processed the EFI memmap, which we prefer over the E820
>   * table if it is available.
> @@ -723,21 +740,9 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
>                  * free memory and thus available to place the kernel image into,
>                  * but in practice there's firmware where using that memory leads
>                  * to crashes.
> -                *
> -                * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
> -                * supported) are guaranteed to be free.
>                  */
> -
> -               switch (md->type) {
> -               case EFI_CONVENTIONAL_MEMORY:
> -                       break;
> -               case EFI_UNACCEPTED_MEMORY:
> -                       if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> -                               break;
> +               if (!memory_type_is_free(md))
>                         continue;
> -               default:
> -                       continue;
> -               }
>
>                 if (efi_soft_reserve_enabled() &&
>                     (md->attribute & EFI_MEMORY_SP))
> --
> Regards/Gruss,
>     Boris.
>
> https://people.kernel.org/tglx/notes-about-netiquette
Borislav Petkov May 13, 2022, 9:01 a.m. UTC | #10
+ mroth
- brijesh

On Thu, May 12, 2022 at 10:34:02PM -0700, Dionna Amalie Glaze wrote:
> Kirill, I've been tracking these changes to see if we can handle the
> unaccepted memory type for SEV-SNP, but testing has been an issue. The
> proposed patch in Ovmf to introduce unaccepted memory seems to have
> stalled out last September
> (https://www.mail-archive.com/devel@edk2.groups.io/msg35842.html) and
> is particularly difficult to adapt to SEV-SNP since it doesn't follow
> the TDVF way of initializing all memory. Is there a different
> development I might have missed so that we might test these cases?
> Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any kernel
> uses are essentially dead code.
> 
> Thanks,
> -Dionna
> 
> (apologies for repost in text mode)
> 
> On Tue, May 10, 2022 at 4:04 AM Borislav Petkov <bp@alien8.de> wrote:
> >
> > On Fri, May 06, 2022 at 06:30:13PM +0300, Kirill A. Shutemov wrote:
> > > I find it harder to follow.
> >
> > If in doubt, always consider using a helper function:
> >
> > ---
> >
> > diff --git a/arch/x86/boot/compressed/efi.h b/arch/x86/boot/compressed/efi.h
> > index 7db2f41b54cd..cf475243b6d5 100644
> > --- a/arch/x86/boot/compressed/efi.h
> > +++ b/arch/x86/boot/compressed/efi.h
> > @@ -32,6 +32,7 @@ typedef       struct {
> >  } efi_table_hdr_t;
> >
> >  #define EFI_CONVENTIONAL_MEMORY                 7
> > +#define EFI_UNACCEPTED_MEMORY          15
> >
> >  #define EFI_MEMORY_MORE_RELIABLE \
> >                                 ((u64)0x0000000000010000ULL)    /* higher reliability */
> > diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> > index 28b91df9d31e..39bb4c319dfc 100644
> > --- a/arch/x86/boot/compressed/kaslr.c
> > +++ b/arch/x86/boot/compressed/kaslr.c
> > @@ -671,6 +671,23 @@ static bool process_mem_region(struct mem_vector *region,
> >  }
> >
> >  #ifdef CONFIG_EFI
> > +
> > +/*
> > + * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if supported) are guaranteed
> > + * to be free.
> > + */
> > +static inline bool memory_type_is_free(efi_memory_desc_t *md)
> > +{
> > +       if (md->type == EFI_CONVENTIONAL_MEMORY)
> > +               return true;
> > +
> > +       if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> > +               if (md->type == EFI_UNACCEPTED_MEMORY)
> > +                       return true;
> > +
> > +       return false;
> > +}
> > +
> >  /*
> >   * Returns true if we processed the EFI memmap, which we prefer over the E820
> >   * table if it is available.
> > @@ -723,21 +740,9 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
> >                  * free memory and thus available to place the kernel image into,
> >                  * but in practice there's firmware where using that memory leads
> >                  * to crashes.
> > -                *
> > -                * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
> > -                * supported) are guaranteed to be free.
> >                  */
> > -
> > -               switch (md->type) {
> > -               case EFI_CONVENTIONAL_MEMORY:
> > -                       break;
> > -               case EFI_UNACCEPTED_MEMORY:
> > -                       if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
> > -                               break;
> > +               if (!memory_type_is_free(md))
> >                         continue;
> > -               default:
> > -                       continue;
> > -               }
> >
> >                 if (efi_soft_reserve_enabled() &&
> >                     (md->attribute & EFI_MEMORY_SP))
> > --
> > Regards/Gruss,
> >     Boris.
> >
> > https://people.kernel.org/tglx/notes-about-netiquette
> 
> 
> 
> -- 
> -Dionna Glaze, PhD (she/her)
>
kirill.shutemov@linux.intel.com May 13, 2022, 2:45 p.m. UTC | #11
On Fri, May 13, 2022 at 11:01:43AM +0200, Borislav Petkov wrote:
> + mroth
> - brijesh
> 
> On Thu, May 12, 2022 at 10:34:02PM -0700, Dionna Amalie Glaze wrote:
> > Kirill, I've been tracking these changes to see if we can handle the
> > unaccepted memory type for SEV-SNP, but testing has been an issue. The
> > proposed patch in Ovmf to introduce unaccepted memory seems to have
> > stalled out last September
> > (https://www.mail-archive.com/devel@edk2.groups.io/msg35842.html) and
> > is particularly difficult to adapt to SEV-SNP since it doesn't follow
> > the TDVF way of initializing all memory. Is there a different
> > development I might have missed so that we might test these cases?
> > Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any kernel
> > uses are essentially dead code.

+ Min, Jiaqi.

I don't follow firmware development. Min, Jiaqi, could you comment?
Xu, Min M May 16, 2022, 6:46 a.m. UTC | #12
On May 13, 2022 10:45 PM, Kirill A. Shutemov wrote:
> On Fri, May 13, 2022 at 11:01:43AM +0200, Borislav Petkov wrote:
> > + mroth
> > - brijesh
> >
> > On Thu, May 12, 2022 at 10:34:02PM -0700, Dionna Amalie Glaze wrote:
> > > Kirill, I've been tracking these changes to see if we can handle the
> > > unaccepted memory type for SEV-SNP, but testing has been an issue.
> > > The proposed patch in Ovmf to introduce unaccepted memory seems to
> > > have stalled out last September
> > > (https://www.mail-archive.com/devel@edk2.groups.io/msg35842.html)
> > > and is particularly difficult to adapt to SEV-SNP since it doesn't
> > > follow the TDVF way of initializing all memory. Is there a different
> > > development I might have missed so that we might test these cases?
> > > Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any
> kernel
> > > uses are essentially dead code.
> 
> + Min, Jiaqi.
> 
> I don't follow firmware development. Min, Jiaqi, could you comment?
> 
We have prepared the patch for unaccepted memory and it is now working in our internal release.
But there is an obstacle to upstream it to edk2 master branch. 
The patch-set depends on the definition of UEFI_RESOURCE_MEMORY_UNACCEPTED in PI spec. This is proposed in https://github.com/microsoft/mu_basecore/pull/66/files#diff-b20a11152d1ce9249c691be5690b4baf52069efadf2e2546cdd2eb663d80c9e4R237, according to UEFI-Code-First. The proposal was approved in 2021 in UEFI Mantis, and will be added to the new PI.next specification. (Till now it has not been added in the latest PI spec.)
So UEFI_RESOURCE_MEMORY_UNACCEPTED cannot be added in MdePkg which make it difficult to submit the patch to edk2 community for review. See this link: https://edk2.groups.io/g/devel/message/87558

Please be noted: UEFI_RESOURCE_MEMORY_UNACCEPTED (defined in PI spec) is different from EFI_UNACCEPTED_MEMORY (defined in UEFI spec)

I will submit the patch-set once the new definition is added in the new PI.next spec.

Thanks
Min
Dionna Amalie Glaze May 31, 2022, 10:37 p.m. UTC | #13
Hi y'all, I've made minimal changes to OVMF to prevalidate only up to 4GB
and leave the rest unaccepted, as Thomas Lendacky recommended
https://github.com/AMDESE/ovmf/pull/4#issuecomment-1138606275 and ran a
memtouch test to see if this change behaves as expected. One thing that
struck me is that an 8GB machine reports 2044MB free with this change (free
-k) whereas without it, I see 7089MB free. I think that unaccepted memory
should be classified as free in meminfo, no? I'm not familiar enough with
that code to say what specific change needs to be made.

On Sun, May 15, 2022 at 11:47 PM Xu, Min M <min.m.xu@intel.com> wrote:

> On May 13, 2022 10:45 PM, Kirill A. Shutemov wrote:
> > On Fri, May 13, 2022 at 11:01:43AM +0200, Borislav Petkov wrote:
> > > + mroth
> > > - brijesh
> > >
> > > On Thu, May 12, 2022 at 10:34:02PM -0700, Dionna Amalie Glaze wrote:
> > > > Kirill, I've been tracking these changes to see if we can handle the
> > > > unaccepted memory type for SEV-SNP, but testing has been an issue.
> > > > The proposed patch in Ovmf to introduce unaccepted memory seems to
> > > > have stalled out last September
> > > > (https://www.mail-archive.com/devel@edk2.groups.io/msg35842.html)
> > > > and is particularly difficult to adapt to SEV-SNP since it doesn't
> > > > follow the TDVF way of initializing all memory. Is there a different
> > > > development I might have missed so that we might test these cases?
> > > > Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any
> > kernel
> > > > uses are essentially dead code.
> >
> > + Min, Jiaqi.
> >
> > I don't follow firmware development. Min, Jiaqi, could you comment?
> >
> We have prepared the patch for unaccepted memory and it is now working in
> our internal release.
> But there is an obstacle to upstream it to edk2 master branch.
> The patch-set depends on the definition of UEFI_RESOURCE_MEMORY_UNACCEPTED
> in PI spec. This is proposed in
> https://github.com/microsoft/mu_basecore/pull/66/files#diff-b20a11152d1ce9249c691be5690b4baf52069efadf2e2546cdd2eb663d80c9e4R237,
> according to UEFI-Code-First. The proposal was approved in 2021 in UEFI
> Mantis, and will be added to the new PI.next specification. (Till now it
> has not been added in the latest PI spec.)
> So UEFI_RESOURCE_MEMORY_UNACCEPTED cannot be added in MdePkg which make it
> difficult to submit the patch to edk2 community for review. See this link:
> https://edk2.groups.io/g/devel/message/87558
>
> Please be noted: UEFI_RESOURCE_MEMORY_UNACCEPTED (defined in PI spec) is
> different from EFI_UNACCEPTED_MEMORY (defined in UEFI spec)
>
> I will submit the patch-set once the new definition is added in the new
> PI.next spec.
>
> Thanks
> Min
>
Dionna Amalie Glaze May 31, 2022, 10:40 p.m. UTC | #14
Hi y'all, I've made minimal changes to OVMF to prevalidate only up to
4GB and leave the rest unaccepted, as Thomas Lendacky recommended
https://github.com/AMDESE/ovmf/pull/4#issuecomment-1138606275 and ran
a memtouch test to see if this change behaves as expected. One thing
that struck me is that an 8GB machine reports 2044MB free with this
change (free -k) whereas without it, I see 7089MB free. I think that
unaccepted memory should be classified as free in meminfo, no? I'm not
familiar enough with that code to say what specific change needs to be
made.

(resent in text mode)


On Sun, May 15, 2022 at 11:47 PM Xu, Min M <min.m.xu@intel.com> wrote:
>
> On May 13, 2022 10:45 PM, Kirill A. Shutemov wrote:
> > On Fri, May 13, 2022 at 11:01:43AM +0200, Borislav Petkov wrote:
> > > + mroth
> > > - brijesh
> > >
> > > On Thu, May 12, 2022 at 10:34:02PM -0700, Dionna Amalie Glaze wrote:
> > > > Kirill, I've been tracking these changes to see if we can handle the
> > > > unaccepted memory type for SEV-SNP, but testing has been an issue.
> > > > The proposed patch in Ovmf to introduce unaccepted memory seems to
> > > > have stalled out last September
> > > > (https://www.mail-archive.com/devel@edk2.groups.io/msg35842.html)
> > > > and is particularly difficult to adapt to SEV-SNP since it doesn't
> > > > follow the TDVF way of initializing all memory. Is there a different
> > > > development I might have missed so that we might test these cases?
> > > > Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any
> > kernel
> > > > uses are essentially dead code.
> >
> > + Min, Jiaqi.
> >
> > I don't follow firmware development. Min, Jiaqi, could you comment?
> >
> We have prepared the patch for unaccepted memory and it is now working in our internal release.
> But there is an obstacle to upstream it to edk2 master branch.
> The patch-set depends on the definition of UEFI_RESOURCE_MEMORY_UNACCEPTED in PI spec. This is proposed in https://github.com/microsoft/mu_basecore/pull/66/files#diff-b20a11152d1ce9249c691be5690b4baf52069efadf2e2546cdd2eb663d80c9e4R237, according to UEFI-Code-First. The proposal was approved in 2021 in UEFI Mantis, and will be added to the new PI.next specification. (Till now it has not been added in the latest PI spec.)
> So UEFI_RESOURCE_MEMORY_UNACCEPTED cannot be added in MdePkg which make it difficult to submit the patch to edk2 community for review. See this link: https://edk2.groups.io/g/devel/message/87558
>
> Please be noted: UEFI_RESOURCE_MEMORY_UNACCEPTED (defined in PI spec) is different from EFI_UNACCEPTED_MEMORY (defined in UEFI spec)
>
> I will submit the patch-set once the new definition is added in the new PI.next spec.
>
> Thanks
> Min
Gupta, Pankaj June 1, 2022, 3:49 p.m. UTC | #15
> Hi y'all, I've made minimal changes to OVMF to prevalidate only up to
> 4GB and leave the rest unaccepted, as Thomas Lendacky recommended
> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FAMDESE%2Fovmf%2Fpull%2F4%23issuecomment-1138606275&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=K93%2F1FrPOo4bIWcssHoisM8vDkOBjWh69bUWosT%2Bt0E%3D&amp;reserved=0 and ran
> a memtouch test to see if this change behaves as expected. One thing
> that struck me is that an 8GB machine reports 2044MB free with this
> change (free -k) whereas without it, I see 7089MB free. I think that
> unaccepted memory should be classified as free in meminfo, no? I'm not
> familiar enough with that code to say what specific change needs to be
> made.
> 

Is it memory accounting issue when accepting all the memory at boot time
compared to 4GB:4GB preboot_acceptance:use_time_acceptance split?

You said you ran memtouch (don't know how it works, assuming it uses 
memory)? Doesn't that mean most of the memory used and hence accepted? 
So, free memory reduced?

Just trying to understand the issue.

Thanks,
Pankaj
> 
> 
> On Sun, May 15, 2022 at 11:47 PM Xu, Min M <min.m.xu@intel.com> wrote:
>>
>> On May 13, 2022 10:45 PM, Kirill A. Shutemov wrote:
>>> On Fri, May 13, 2022 at 11:01:43AM +0200, Borislav Petkov wrote:
>>>> + mroth
>>>> - brijesh
>>>>
>>>> On Thu, May 12, 2022 at 10:34:02PM -0700, Dionna Amalie Glaze wrote:
>>>>> Kirill, I've been tracking these changes to see if we can handle the
>>>>> unaccepted memory type for SEV-SNP, but testing has been an issue.
>>>>> The proposed patch in Ovmf to introduce unaccepted memory seems to
>>>>> have stalled out last September
>>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.mail-archive.com%2Fdevel%40edk2.groups.io%2Fmsg35842.html&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=Hku8nQJGOg%2FdQqypHxw2eLFG0e%2FE6HoF5VXSIhMpmx0%3D&amp;reserved=0)
>>>>> and is particularly difficult to adapt to SEV-SNP since it doesn't
>>>>> follow the TDVF way of initializing all memory. Is there a different
>>>>> development I might have missed so that we might test these cases?
>>>>> Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any
>>> kernel
>>>>> uses are essentially dead code.
>>>
>>> + Min, Jiaqi.
>>>
>>> I don't follow firmware development. Min, Jiaqi, could you comment?
>>>
>> We have prepared the patch for unaccepted memory and it is now working in our internal release.
>> But there is an obstacle to upstream it to edk2 master branch.
>> The patch-set depends on the definition of UEFI_RESOURCE_MEMORY_UNACCEPTED in PI spec. This is proposed in https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fmicrosoft%2Fmu_basecore%2Fpull%2F66%2Ffiles%23diff-b20a11152d1ce9249c691be5690b4baf52069efadf2e2546cdd2eb663d80c9e4R237&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=v7s68GZWXJfaXB7vfvXjAlTD2KLOSghk%2Bj3GXF3FTVg%3D&amp;reserved=0, according to UEFI-Code-First. The proposal was approved in 2021 in UEFI Mantis, and will be added to the new PI.next specification. (Till now it has not been added in the latest PI spec.)
>> So UEFI_RESOURCE_MEMORY_UNACCEPTED cannot be added in MdePkg which make it difficult to submit the patch to edk2 community for review. See this link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fedk2.groups.io%2Fg%2Fdevel%2Fmessage%2F87558&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=WVIJ2yRRd2URwIF85Dp0WD4ovibZlsobijIGbN6MWZQ%3D&amp;reserved=0
>>
>> Please be noted: UEFI_RESOURCE_MEMORY_UNACCEPTED (defined in PI spec) is different from EFI_UNACCEPTED_MEMORY (defined in UEFI spec)
>>
>> I will submit the patch-set once the new definition is added in the new PI.next spec.
>>
>> Thanks
>> Min
> 
> 
>
Dionna Amalie Glaze June 1, 2022, 4:20 p.m. UTC | #16
The memory accounting in Linux is probably the issue. Both times I ran
the test were from a freshly booted VM. The test parses the output of
$(free -k) to determine the amount of free memory it should allocate
and write/read from, with a given stride of pages to skip before
touching the next page.

We grab the third column of numbers from the Mem output that looks like this

               total        used        free      shared  buff/cache   available
Mem:        65856604     4128688    48558952       11208    13168964    60942928
Swap:        1953788      118124     1835664

So my workstation has 48558952 free bytes. We take that, give it to
memtouch to allocate that much anonymous memory rounded down to the
nearest MB with mmap and randomly read/write the buffer.

For an 8GB machine, the UEFI will have the initial 0-0xA000 memory and
0x10_0000 to 0xC00_0000 (beginning of mmio hole) prevalidated. The
next 5GB is classified as the UEFI v2.9 memory type
EFI_RESOURCE_MEMORY_UNACCEPTED, 0x1_4000_000 to 0x2_0000_0000.
The Linux e820 map should see that range as unaccepted rather than
EFI_CONVENTIONAL_MEMORY (i.e., EDK2's EFI_RESOURCE_SYSTEM_MEMORY), but
I think it needs to be accounted as free conventional memory.

So when I see 2044MB free vs 7089MB free in my VMs, the two are
roughly 5GB different.

On Wed, Jun 1, 2022 at 8:49 AM Gupta, Pankaj <pankaj.gupta@amd.com> wrote:
>
>
> > Hi y'all, I've made minimal changes to OVMF to prevalidate only up to
> > 4GB and leave the rest unaccepted, as Thomas Lendacky recommended
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2FAMDESE%2Fovmf%2Fpull%2F4%23issuecomment-1138606275&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=K93%2F1FrPOo4bIWcssHoisM8vDkOBjWh69bUWosT%2Bt0E%3D&amp;reserved=0 and ran
> > a memtouch test to see if this change behaves as expected. One thing
> > that struck me is that an 8GB machine reports 2044MB free with this
> > change (free -k) whereas without it, I see 7089MB free. I think that
> > unaccepted memory should be classified as free in meminfo, no? I'm not
> > familiar enough with that code to say what specific change needs to be
> > made.
> >
>
> Is it memory accounting issue when accepting all the memory at boot time
> compared to 4GB:4GB preboot_acceptance:use_time_acceptance split?
>
> You said you ran memtouch (don't know how it works, assuming it uses
> memory)? Doesn't that mean most of the memory used and hence accepted?
> So, free memory reduced?
>
> Just trying to understand the issue.
>
> Thanks,
> Pankaj
> >
> >
> > On Sun, May 15, 2022 at 11:47 PM Xu, Min M <min.m.xu@intel.com> wrote:
> >>
> >> On May 13, 2022 10:45 PM, Kirill A. Shutemov wrote:
> >>> On Fri, May 13, 2022 at 11:01:43AM +0200, Borislav Petkov wrote:
> >>>> + mroth
> >>>> - brijesh
> >>>>
> >>>> On Thu, May 12, 2022 at 10:34:02PM -0700, Dionna Amalie Glaze wrote:
> >>>>> Kirill, I've been tracking these changes to see if we can handle the
> >>>>> unaccepted memory type for SEV-SNP, but testing has been an issue.
> >>>>> The proposed patch in Ovmf to introduce unaccepted memory seems to
> >>>>> have stalled out last September
> >>>>> (https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.mail-archive.com%2Fdevel%40edk2.groups.io%2Fmsg35842.html&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=Hku8nQJGOg%2FdQqypHxw2eLFG0e%2FE6HoF5VXSIhMpmx0%3D&amp;reserved=0)
> >>>>> and is particularly difficult to adapt to SEV-SNP since it doesn't
> >>>>> follow the TDVF way of initializing all memory. Is there a different
> >>>>> development I might have missed so that we might test these cases?
> >>>>> Without the UEFI introducing EFI_UNACCEPTED_MEMORY type, any
> >>> kernel
> >>>>> uses are essentially dead code.
> >>>
> >>> + Min, Jiaqi.
> >>>
> >>> I don't follow firmware development. Min, Jiaqi, could you comment?
> >>>
> >> We have prepared the patch for unaccepted memory and it is now working in our internal release.
> >> But there is an obstacle to upstream it to edk2 master branch.
> >> The patch-set depends on the definition of UEFI_RESOURCE_MEMORY_UNACCEPTED in PI spec. This is proposed in https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgithub.com%2Fmicrosoft%2Fmu_basecore%2Fpull%2F66%2Ffiles%23diff-b20a11152d1ce9249c691be5690b4baf52069efadf2e2546cdd2eb663d80c9e4R237&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=v7s68GZWXJfaXB7vfvXjAlTD2KLOSghk%2Bj3GXF3FTVg%3D&amp;reserved=0, according to UEFI-Code-First. The proposal was approved in 2021 in UEFI Mantis, and will be added to the new PI.next specification. (Till now it has not been added in the latest PI spec.)
> >> So UEFI_RESOURCE_MEMORY_UNACCEPTED cannot be added in MdePkg which make it difficult to submit the patch to edk2 community for review. See this link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fedk2.groups.io%2Fg%2Fdevel%2Fmessage%2F87558&amp;data=05%7C01%7Cpankaj.gupta%40amd.com%7Cde8fd09ad93f4420bd7408da43568f68%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637896336342540814%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=WVIJ2yRRd2URwIF85Dp0WD4ovibZlsobijIGbN6MWZQ%3D&amp;reserved=0
> >>
> >> Please be noted: UEFI_RESOURCE_MEMORY_UNACCEPTED (defined in PI spec) is different from EFI_UNACCEPTED_MEMORY (defined in UEFI spec)
> >>
> >> I will submit the patch-set once the new definition is added in the new PI.next spec.
> >>
> >> Thanks
> >> Min
> >
> >
> >
>
Randy Dunlap June 1, 2022, 7:34 p.m. UTC | #17
Hi--

On 6/1/22 09:20, Dionna Amalie Glaze wrote:
> The memory accounting in Linux is probably the issue. Both times I ran
> the test were from a freshly booted VM. The test parses the output of
> $(free -k) to determine the amount of free memory it should allocate
> and write/read from, with a given stride of pages to skip before
> touching the next page.
> 
> We grab the third column of numbers from the Mem output that looks like this
> 
>                total        used        free      shared  buff/cache   available
> Mem:        65856604     4128688    48558952       11208    13168964    60942928
> Swap:        1953788      118124     1835664
> 
> So my workstation has 48558952 free bytes. We take that, give it to
> memtouch to allocate that much anonymous memory rounded down to the
> nearest MB with mmap and randomly read/write the buffer.
> 
> For an 8GB machine, the UEFI will have the initial 0-0xA000 memory and
> 0x10_0000 to 0xC00_0000 (beginning of mmio hole) prevalidated. The
> next 5GB is classified as the UEFI v2.9 memory type
> EFI_RESOURCE_MEMORY_UNACCEPTED, 0x1_4000_000 to 0x2_0000_0000.
> The Linux e820 map should see that range as unaccepted rather than
> EFI_CONVENTIONAL_MEMORY (i.e., EDK2's EFI_RESOURCE_SYSTEM_MEMORY), but
> I think it needs to be accounted as free conventional memory.
> 
> So when I see 2044MB free vs 7089MB free in my VMs, the two are
> roughly 5GB different.

Please see/read/use
  https://people.kernel.org/tglx/notes-about-netiquette
Gupta, Pankaj June 1, 2022, 9:19 p.m. UTC | #18
>> The memory accounting in Linux is probably the issue. Both times I ran
>> the test were from a freshly booted VM. The test parses the output of
>> $(free -k) to determine the amount of free memory it should allocate
>> and write/read from, with a given stride of pages to skip before
>> touching the next page.
>>
>> We grab the third column of numbers from the Mem output that looks like this
>>
>>                 total        used        free      shared  buff/cache   available
>> Mem:        65856604     4128688    48558952       11208    13168964    60942928
>> Swap:        1953788      118124     1835664
>>
>> So my workstation has 48558952 free bytes. We take that, give it to
>> memtouch to allocate that much anonymous memory rounded down to the
>> nearest MB with mmap and randomly read/write the buffer.
>>
>> For an 8GB machine, the UEFI will have the initial 0-0xA000 memory and
>> 0x10_0000 to 0xC00_0000 (beginning of mmio hole) prevalidated. The
>> next 5GB is classified as the UEFI v2.9 memory type
>> EFI_RESOURCE_MEMORY_UNACCEPTED, 0x1_4000_000 to 0x2_0000_0000.
>> The Linux e820 map should see that range as unaccepted rather than
>> EFI_CONVENTIONAL_MEMORY (i.e., EDK2's EFI_RESOURCE_SYSTEM_MEMORY), but
>> I think it needs to be accounted as free conventional memory.
>>
>> So when I see 2044MB free vs 7089MB free in my VMs, the two are
>> roughly 5GB different.
> 
> Please see/read/use
...

Apologies, some problem in my email client (appending long characters to 
links), will find and correct.

Thank you for pointing.

Best regards,
Pankaj
Gupta, Pankaj June 2, 2022, 12:51 p.m. UTC | #19
> The memory accounting in Linux is probably the issue. Both times I ran
> the test were from a freshly booted VM. The test parses the output of
> $(free -k) to determine the amount of free memory it should allocate
> and write/read from, with a given stride of pages to skip before
> touching the next page.
> 
> We grab the third column of numbers from the Mem output that looks like this
> 
>                 total        used        free      shared  buff/cache   available
> Mem:        65856604     4128688    48558952       11208    13168964    60942928
> Swap:        1953788      118124     1835664
> 
> So my workstation has 48558952 free bytes. We take that, give it to
> memtouch to allocate that much anonymous memory rounded down to the
> nearest MB with mmap and randomly read/write the buffer.
> 
> For an 8GB machine, the UEFI will have the initial 0-0xA000 memory and
> 0x10_0000 to 0xC00_0000 (beginning of mmio hole) prevalidated. The
> next 5GB is classified as the UEFI v2.9 memory type
> EFI_RESOURCE_MEMORY_UNACCEPTED, 0x1_4000_000 to 0x2_0000_0000.
> The Linux e820 map should see that range as unaccepted rather than
> EFI_CONVENTIONAL_MEMORY (i.e., EDK2's EFI_RESOURCE_SYSTEM_MEMORY), but
> I think it needs to be accounted as free conventional memory.

AFAIU the unaccepted memory also stays in buddy (first via slow path) 
and should be accounted automatically in free?

> 
> So when I see 2044MB free vs 7089MB free in my VMs, the two are
> roughly 5GB different.

Is it possible all memory got allocated with memblock? Maybe some 
variable tests to validate with '/proc/meminfo | grep UnacceptedMem' 
would give you more clue.

Thanks,
Pankaj
Dionna Amalie Glaze June 2, 2022, 3:31 p.m. UTC | #20
On Thu, Jun 2, 2022 at 5:51 AM Gupta, Pankaj <pankaj.gupta@amd.com> wrote:
> AFAIU the unaccepted memory also stays in buddy (first via slow path)
> and should be accounted automatically in free?
>

No, the last patch adds unaccepted mem as a differently accounted memory type.

> >
> > So when I see 2044MB free vs 7089MB free in my VMs, the two are
> > roughly 5GB different.
>
> Is it possible all memory got allocated with memblock? Maybe some
> variable tests to validate with '/proc/meminfo | grep UnacceptedMem'
> would give you more clue.
>

free -k parses /proc/meminfo for MemFree and SwapFree in
/proc/meminfo, so it sounds like it should also add in UnacceptedMem.
We'll try that. Thanks.




--
-Dionna Glaze, PhD (she/her)
Dionna Amalie Glaze June 7, 2022, 5:28 p.m. UTC | #21
> free -k parses /proc/meminfo for MemFree and SwapFree in
> /proc/meminfo, so it sounds like it should also add in UnacceptedMem.
> We'll try that. Thanks.
>

Testing error on my part. Was using an adaptation of an old patch set.
With Brijesh's SEV-SNP support adapted to this version on top of
SEV-SNP guest patch series v12, I have no trouble with passing our
memtouch test.

Reference: https://github.com/deeglaze/amdese-linux/tree/v12unaccepted-v5
Gupta, Pankaj June 7, 2022, 6:15 p.m. UTC | #22
> Testing error on my part. Was using an adaptation of an old patch set.
> With Brijesh's SEV-SNP support adapted to this version on top of
> SEV-SNP guest patch series v12, I have no trouble with passing our
> memtouch test.

Ah Guest unaccepted memory validate patch was missing. Good to know it 
worked for you. Thank you for sharing the result.

Best regards,
Pankaj
diff mbox series

Patch

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 7f672f7e2fea..b59007e57cbf 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -102,7 +102,7 @@  endif
 
 vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
 vmlinux-objs-$(CONFIG_INTEL_TDX_GUEST) += $(obj)/tdx.o $(obj)/tdcall.o
-vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/bitmap.o $(obj)/mem.o
+vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/bitmap.o $(obj)/find.o $(obj)/mem.o
 
 vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
 efi-obj-$(CONFIG_EFI_STUB) = $(objtree)/drivers/firmware/efi/libstub/lib.a
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 411b268bc0a2..59db90626042 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -725,10 +725,20 @@  process_efi_entries(unsigned long minimum, unsigned long image_size)
 		 * but in practice there's firmware where using that memory leads
 		 * to crashes.
 		 *
-		 * Only EFI_CONVENTIONAL_MEMORY is guaranteed to be free.
+		 * Only EFI_CONVENTIONAL_MEMORY and EFI_UNACCEPTED_MEMORY (if
+		 * supported) are guaranteed to be free.
 		 */
-		if (md->type != EFI_CONVENTIONAL_MEMORY)
+
+		switch (md->type) {
+		case EFI_CONVENTIONAL_MEMORY:
+			break;
+		case EFI_UNACCEPTED_MEMORY:
+			if (IS_ENABLED(CONFIG_UNACCEPTED_MEMORY))
+				break;
 			continue;
+		default:
+			continue;
+		}
 
 		if (efi_soft_reserve_enabled() &&
 		    (md->attribute & EFI_MEMORY_SP))
diff --git a/arch/x86/boot/compressed/mem.c b/arch/x86/boot/compressed/mem.c
index 415df0d3bc81..b5058c975d26 100644
--- a/arch/x86/boot/compressed/mem.c
+++ b/arch/x86/boot/compressed/mem.c
@@ -3,12 +3,15 @@ 
 #include "../cpuflags.h"
 #include "bitmap.h"
 #include "error.h"
+#include "find.h"
 #include "math.h"
 
 #define PMD_SHIFT	21
 #define PMD_SIZE	(_AC(1, UL) << PMD_SHIFT)
 #define PMD_MASK	(~(PMD_SIZE - 1))
 
+extern struct boot_params *boot_params;
+
 static inline void __accept_memory(phys_addr_t start, phys_addr_t end)
 {
 	/* Platform-specific memory-acceptance call goes here */
@@ -66,3 +69,21 @@  void process_unaccepted_memory(struct boot_params *params, u64 start, u64 end)
 	bitmap_set((unsigned long *)params->unaccepted_memory,
 		   start / PMD_SIZE, (end - start) / PMD_SIZE);
 }
+
+void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+	unsigned long range_start, range_end;
+	unsigned long *unaccepted_memory;
+	unsigned long bitmap_size;
+
+	unaccepted_memory = (unsigned long *)boot_params->unaccepted_memory;
+	range_start = start / PMD_SIZE;
+	bitmap_size = DIV_ROUND_UP(end, PMD_SIZE);
+
+	for_each_set_bitrange_from(range_start, range_end,
+				   unaccepted_memory, bitmap_size) {
+		__accept_memory(range_start * PMD_SIZE, range_end * PMD_SIZE);
+		bitmap_clear(unaccepted_memory,
+			     range_start, range_end - range_start);
+	}
+}
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index fa8969fad011..285b37e28074 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -18,6 +18,7 @@ 
 #include "../string.h"
 #include "../voffset.h"
 #include <asm/bootparam_utils.h>
+#include <asm/unaccepted_memory.h>
 
 /*
  * WARNING!!
@@ -451,6 +452,14 @@  asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #endif
 
 	debug_putstr("\nDecompressing Linux... ");
+
+#ifdef CONFIG_UNACCEPTED_MEMORY
+	if (boot_params->unaccepted_memory) {
+		debug_putstr("Accepting memory... ");
+		accept_memory(__pa(output), __pa(output) + needed_size);
+	}
+#endif
+
 	__decompress(input_data, input_len, NULL, NULL, output, output_len,
 			NULL, error);
 	parse_elf(output);
diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h
index df0736d32858..41fbfc798100 100644
--- a/arch/x86/include/asm/unaccepted_memory.h
+++ b/arch/x86/include/asm/unaccepted_memory.h
@@ -7,4 +7,6 @@  struct boot_params;
 
 void process_unaccepted_memory(struct boot_params *params, u64 start, u64 num);
 
+void accept_memory(phys_addr_t start, phys_addr_t end);
+
 #endif