diff mbox series

x86/microcode: Support builtin CPU microcode

Message ID 20191209084119.87563-1-elnikety@amazon.com (mailing list archive)
State Superseded
Headers show
Series x86/microcode: Support builtin CPU microcode | expand

Commit Message

Eslam Elnikety Dec. 9, 2019, 8:41 a.m. UTC
Xen relies on boot modules to perform early microcode updates. This commit adds
another mode, namely "builtin" via the BUILTIN_UCODE config parameter. If set,
the Xen image itself will contain the microcode updates. Upon boot, Xen
inspects its image for microcode blobs and performs the update.

A Xen image with builtin microcode can be explicitly instructed to:
    (a) look for microcode elsewhere (e.g., a boot module that contains more
        recent microcodes via ucode=scan), or
    (b) skip the builtin microcode update (e.g., ucode=no-builtin).

Signed-off-by: Eslam Elnikety <elnikety@amazon.com>
---
 docs/misc/builtin-ucode.txt       | 60 +++++++++++++++++++++++++++++++
 docs/misc/xen-command-line.pandoc |  5 ++-
 xen/arch/x86/Kconfig              | 20 +++++++++++
 xen/arch/x86/Makefile             |  1 +
 xen/arch/x86/microcode.c          | 60 +++++++++++++++++++++++++++++--
 xen/arch/x86/microcode/Makefile   | 40 +++++++++++++++++++++
 xen/arch/x86/xen.lds.S            | 12 +++++++
 7 files changed, 194 insertions(+), 4 deletions(-)
 create mode 100644 docs/misc/builtin-ucode.txt
 create mode 100644 xen/arch/x86/microcode/Makefile

Comments

Andrew Cooper Dec. 9, 2019, 3:19 p.m. UTC | #1
On 09/12/2019 08:41, Eslam Elnikety wrote:
> diff --git a/docs/misc/builtin-ucode.txt b/docs/misc/builtin-ucode.txt
> new file mode 100644
> index 0000000000..43bb60d3eb

Instead of introducing a new file, please extend
docs/admin-guide/microcode-loading.rst

I have an in-prep docs/hypervisor-guide/microcode-loading.rst as well,
which I'll see about posting.  It would be a more appropriate place for
the technical details.

> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
> index 6ced293d88..7afbe44286 100644
> --- a/xen/arch/x86/microcode.c
> +++ b/xen/arch/x86/microcode.c
> @@ -97,6 +97,14 @@ static struct ucode_mod_blob __initdata ucode_blob;
>   */
>  static bool_t __initdata ucode_scan;
>  
> +#ifdef CONFIG_BUILTIN_UCODE
> +/* builtin is the default when BUILTIN_UCODE is set */
> +static bool_t __initdata ucode_builtin = 1;

bool, true.

> +
> +extern const char __builtin_intel_ucode_start[], __builtin_intel_ucode_end[];
> +extern const char __builtin_amd_ucode_start[], __builtin_amd_ucode_end[];
> +#endif
> +
>  /* By default, ucode loading is done in NMI handler */
>  static bool ucode_in_nmi = true;
>  
> @@ -110,9 +118,9 @@ void __init microcode_set_module(unsigned int idx)
>  }
>  
>  /*
> - * The format is '[<integer>|scan=<bool>, nmi=<bool>]'. Both options are
> - * optional. If the EFI has forced which of the multiboot payloads is to be
> - * used, only nmi=<bool> is parsed.
> + * The format is '[<integer>|scan=<bool>|builtin=<bool>, nmi=<bool>]'. All
> + * options are optional. If the EFI has forced which of the multiboot payloads
> + * is to be used, only nmi=<bool> is parsed.
>   */

Please delete this, or I'll do a prereq patch to fix it and the command
line docs.  (Both are in a poor state.)

> @@ -237,6 +249,48 @@ void __init microcode_grab_module(
>  scan:
>      if ( ucode_scan )
>          microcode_scan_module(module_map, mbi);
> +
> +#ifdef CONFIG_BUILTIN_UCODE
> +    /*
> +     * Do not use the builtin microcode if:
> +     * (a) builtin has been explicitly turned off (e.g., ucode=no-builtin)
> +     * (b) a microcode module has been specified or a scan is successful
> +     */
> +    if ( !ucode_builtin || ucode_mod.mod_end || ucode_blob.size )
> +        return;
> +
> +    /* Set ucode_start/_end to the proper blob */
> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
> +        ucode_blob.size = (size_t)(__builtin_amd_ucode_end
> +                                   - __builtin_amd_ucode_start);

No need to cast the result.  Also, preferred Xen style would be to have
- on the preceding line.

> +    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
> +        ucode_blob.size = (size_t)(__builtin_intel_ucode_end
> +                                   - __builtin_intel_ucode_start);
> +    else
> +        return;
> +
> +    if ( !ucode_blob.size )
> +    {
> +        printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
> +        return;
> +    }
> +    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )
> +    {
> +        printk("Builtin microcode payload too big! (%ld, we can do %d)\n",
> +               ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
> +        ucode_blob.size = 0;
> +        return;
> +    }
> +
> +    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
> +    if ( !ucode_blob.data )
> +        return;

Any chance we can reuse the "fits" logic to avoid holding every
inapplicable blob in memory as well?

> +
> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
> +        memcpy(ucode_blob.data, __builtin_amd_ucode_start, ucode_blob.size);
> +    else
> +        memcpy(ucode_blob.data, __builtin_intel_ucode_start, ucode_blob.size);
> +#endif
>  }
>  
>  const struct microcode_ops *microcode_ops;
> diff --git a/xen/arch/x86/microcode/Makefile b/xen/arch/x86/microcode/Makefile
> new file mode 100644
> index 0000000000..6d585c5482
> --- /dev/null
> +++ b/xen/arch/x86/microcode/Makefile
> @@ -0,0 +1,40 @@
> +# Copyright (C) 2019 Amazon.com, Inc. or its affiliates.
> +# Author: Eslam Elnikety <elnikety@amazon.com>
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +
> +obj-y += builtin_ucode.o
> +
> +# Directory holding the microcode updates.
> +UCODE_DIR=$(patsubst "%",%,$(CONFIG_BUILTIN_UCODE_DIR))
> +amd-blobs := $(wildcard $(UCODE_DIR)/amd-ucode/*)
> +intel-blobs := $(wildcard $(UCODE_DIR)/intel-ucode/*)

This is a little dangerous.  I can see why you want to do it like this,
and I can't provide any obvious suggestions, but if this glob picks up
anything which isn't a microcode file, it will break the logic to search
for the right blob.

> +
> +builtin_ucode.o: Makefile $(amd-blobs) $(intel-blobs)
> +	# Create AMD microcode blob if there are AMD updates on the build system
> +	if [ ! -z "$(amd-blobs)" ]; then \
> +		cat $(amd-blobs) > $@.bin ; \
> +		$(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 --rename-section .data=.builtin_amd_ucode,alloc,load,readonly,data,contents $@.bin $@.amd; \
> +		rm -f $@.bin; \
> +	fi
> +	# Create INTEL microcode blob if there are INTEL updates on the build system
> +	if [ ! -z "$(intel-blobs)" ]; then \
> +		cat $(intel-blobs) > $@.bin; \
> +		$(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 --rename-section .data=.builtin_intel_ucode,alloc,load,readonly,data,contents $@.bin $@.intel; \
> +		rm -f $@.bin; \
> +	fi
> +	# Create fake builtin_ucode.o if no updates were present. Otherwise, builtin_ucode.o carries the available updates
> +	if [ -z "$(amd-blobs)" -a -z "$(intel-blobs)" ]; then \
> +		$(CC) $(CFLAGS) -c -x c /dev/null -o $@; \
> +	else \
> +		$(LD) $(LDFLAGS) -r -o $@ $@.*; \
> +		rm -f $@.*; \
> +	fi

How about using weak symbols, rather than playing games like this?

~Andrew
Eslam Elnikety Dec. 9, 2019, 9:49 p.m. UTC | #2
On 09.12.19 16:19, Andrew Cooper wrote:
> On 09/12/2019 08:41, Eslam Elnikety wrote:
>> diff --git a/docs/misc/builtin-ucode.txt b/docs/misc/builtin-ucode.txt
>> new file mode 100644
>> index 0000000000..43bb60d3eb
> 
> Instead of introducing a new file, please extend
> docs/admin-guide/microcode-loading.rst
> 
> I have an in-prep docs/hypervisor-guide/microcode-loading.rst as well,
> which I'll see about posting.  It would be a more appropriate place for
> the technical details.
> 

Agreed!

While the existing docs/admin-guide/microcode-loading.rst speaks a 
different tone than what I added, that documentation anyway needs to be 
updated with builtin if such support were to be added. I will adjust 
accordingly. If docs/hypervisor-guide/microcode-loading.rst makes it in 
time for v2 of this patch, I will reflect changes there too.

>> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
>> index 6ced293d88..7afbe44286 100644
>> --- a/xen/arch/x86/microcode.c
>> +++ b/xen/arch/x86/microcode.c
>> @@ -97,6 +97,14 @@ static struct ucode_mod_blob __initdata ucode_blob;
>>    */
>>   static bool_t __initdata ucode_scan;
>>   
>> +#ifdef CONFIG_BUILTIN_UCODE
>> +/* builtin is the default when BUILTIN_UCODE is set */
>> +static bool_t __initdata ucode_builtin = 1;
> 
> bool, true.
> 

Will fix in v2.

>> +
>> +extern const char __builtin_intel_ucode_start[], __builtin_intel_ucode_end[];
>> +extern const char __builtin_amd_ucode_start[], __builtin_amd_ucode_end[];
>> +#endif
>> +
>>   /* By default, ucode loading is done in NMI handler */
>>   static bool ucode_in_nmi = true;
>>   
>> @@ -110,9 +118,9 @@ void __init microcode_set_module(unsigned int idx)
>>   }
>>   
>>   /*
>> - * The format is '[<integer>|scan=<bool>, nmi=<bool>]'. Both options are
>> - * optional. If the EFI has forced which of the multiboot payloads is to be
>> - * used, only nmi=<bool> is parsed.
>> + * The format is '[<integer>|scan=<bool>|builtin=<bool>, nmi=<bool>]'. All
>> + * options are optional. If the EFI has forced which of the multiboot payloads
>> + * is to be used, only nmi=<bool> is parsed.
>>    */
> 
> Please delete this, or I'll do a prereq patch to fix it and the command
> line docs.  (Both are in a poor state.)
> 

Unless you are planning that along your on-going 
docs/hypervisor-guide/microcode-loading.rst effort, I can pick up this 
clean-up/prereq patch myself. What do you have in mind? (Or point me to 
a good example and I will figure things out).

>> @@ -237,6 +249,48 @@ void __init microcode_grab_module(
>>   scan:
>>       if ( ucode_scan )
>>           microcode_scan_module(module_map, mbi);
>> +
>> +#ifdef CONFIG_BUILTIN_UCODE
>> +    /*
>> +     * Do not use the builtin microcode if:
>> +     * (a) builtin has been explicitly turned off (e.g., ucode=no-builtin)
>> +     * (b) a microcode module has been specified or a scan is successful
>> +     */
>> +    if ( !ucode_builtin || ucode_mod.mod_end || ucode_blob.size )
>> +        return;
>> +
>> +    /* Set ucode_start/_end to the proper blob */
>> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
>> +        ucode_blob.size = (size_t)(__builtin_amd_ucode_end
>> +                                   - __builtin_amd_ucode_start);
> 
> No need to cast the result.  Also, preferred Xen style would be to have
> - on the preceding line.
> 

Will fix in v2.

>> +    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
>> +        ucode_blob.size = (size_t)(__builtin_intel_ucode_end
>> +                                   - __builtin_intel_ucode_start);
>> +    else
>> +        return;
>> +
>> +    if ( !ucode_blob.size )
>> +    {
>> +        printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
>> +        return;
>> +    }
>> +    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )
>> +    {
>> +        printk("Builtin microcode payload too big! (%ld, we can do %d)\n",
>> +               ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
>> +        ucode_blob.size = 0;
>> +        return;
>> +    }
>> +
>> +    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
>> +    if ( !ucode_blob.data )
>> +        return;
> 
> Any chance we can reuse the "fits" logic to avoid holding every
> inapplicable blob in memory as well?
> 

I think this would be a welcomed change. It seems to me that we have two 
ways to go about it.

1) We factor the code in the intel-/amd-specific cpu_request_microcode 
to extract logic for finding a match into its own new function, expose 
that through microcode_ops, and finally do xalloc only for the matching 
microcode when early loading is scan or builtin.

2) Cannot we just do away completely with xalloc? I see that each 
individual microcode update gets allocated anyway in 
microcode_intel.c/get_next_ucode_from_buffer() and in 
microcode_amd.c/cpu_request_microcode(). Unless I am missing something, 
the xmalloc_bytes for ucode_blob.data is redundant.

Thoughts?

>> +
>> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
>> +        memcpy(ucode_blob.data, __builtin_amd_ucode_start, ucode_blob.size);
>> +    else
>> +        memcpy(ucode_blob.data, __builtin_intel_ucode_start, ucode_blob.size);
>> +#endif
>>   }
>>   
>>   const struct microcode_ops *microcode_ops;
>> diff --git a/xen/arch/x86/microcode/Makefile b/xen/arch/x86/microcode/Makefile
>> new file mode 100644
>> index 0000000000..6d585c5482
>> --- /dev/null
>> +++ b/xen/arch/x86/microcode/Makefile
>> @@ -0,0 +1,40 @@
>> +# Copyright (C) 2019 Amazon.com, Inc. or its affiliates.
>> +# Author: Eslam Elnikety <elnikety@amazon.com>
>> +#
>> +# This program is free software; you can redistribute it and/or modify
>> +# it under the terms of the GNU General Public License as published by
>> +# the Free Software Foundation; either version 2 of the License, or
>> +# (at your option) any later version.
>> +#
>> +# This program is distributed in the hope that it will be useful,
>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> +# GNU General Public License for more details.
>> +
>> +obj-y += builtin_ucode.o
>> +
>> +# Directory holding the microcode updates.
>> +UCODE_DIR=$(patsubst "%",%,$(CONFIG_BUILTIN_UCODE_DIR))
>> +amd-blobs := $(wildcard $(UCODE_DIR)/amd-ucode/*)
>> +intel-blobs := $(wildcard $(UCODE_DIR)/intel-ucode/*)
> 
> This is a little dangerous.  I can see why you want to do it like this,
> and I can't provide any obvious suggestions, but if this glob picks up
> anything which isn't a microcode file, it will break the logic to search
> for the right blob.
> 

We can limit the amd-blobs and intel-blob to binaries following the 
naming convention AuthenticAMD.bin and GenuineIntel.bin for amd and 
intel, respectively. Yet, this would be imposing an unnecessary 
restriction on administrators who may want to be innovative with naming 
(or want to use the naming microcode_amd_*.bin or FF-MM-SS instead).

Alternatively, we can introduce CONFIG_BUILTIN_UCODE_INTEL and 
CONFIG_BUILTIN_UCODE_AMD. Both default to empty strings. Then, an 
administrator can specify exactly the microcodes to include relative to 
the CONFIG_BUILTIN_UCODE_DIR. For example:
CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/06-3a-09"
CONFIG_BUILTIN_UCODE_AMD="amd-ucode/microcode_amd_fam15h.bin"

Thoughts?

>> +
>> +builtin_ucode.o: Makefile $(amd-blobs) $(intel-blobs)
>> +	# Create AMD microcode blob if there are AMD updates on the build system
>> +	if [ ! -z "$(amd-blobs)" ]; then \
>> +		cat $(amd-blobs) > $@.bin ; \
>> +		$(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 --rename-section .data=.builtin_amd_ucode,alloc,load,readonly,data,contents $@.bin $@.amd; \
>> +		rm -f $@.bin; \
>> +	fi
>> +	# Create INTEL microcode blob if there are INTEL updates on the build system
>> +	if [ ! -z "$(intel-blobs)" ]; then \
>> +		cat $(intel-blobs) > $@.bin; \
>> +		$(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 --rename-section .data=.builtin_intel_ucode,alloc,load,readonly,data,contents $@.bin $@.intel; \
>> +		rm -f $@.bin; \
>> +	fi
>> +	# Create fake builtin_ucode.o if no updates were present. Otherwise, builtin_ucode.o carries the available updates
>> +	if [ -z "$(amd-blobs)" -a -z "$(intel-blobs)" ]; then \
>> +		$(CC) $(CFLAGS) -c -x c /dev/null -o $@; \
>> +	else \
>> +		$(LD) $(LDFLAGS) -r -o $@ $@.*; \
>> +		rm -f $@.*; \
>> +	fi
> 
> How about using weak symbols, rather than playing games like this?

Just to make sure we are on the same page. You are after a dummy binary 
with weak symbols that eventually get overridden when I link the actual 
microcode binaries into builtin_ucode.o? If so, possible of course. 
Except that I do not particularly see the downside of the existing 
approach with dummy builtin_ucode.o.

> 
> ~Andrew
> 

Thanks a lot for those insightful comments, Andrew.

Cheers,
Eslam
Jan Beulich Dec. 10, 2019, 9:21 a.m. UTC | #3
On 09.12.2019 22:49, Eslam Elnikety wrote:
> On 09.12.19 16:19, Andrew Cooper wrote:
>> On 09/12/2019 08:41, Eslam Elnikety wrote:
>>> --- /dev/null
>>> +++ b/xen/arch/x86/microcode/Makefile
>>> @@ -0,0 +1,40 @@
>>> +# Copyright (C) 2019 Amazon.com, Inc. or its affiliates.
>>> +# Author: Eslam Elnikety <elnikety@amazon.com>
>>> +#
>>> +# This program is free software; you can redistribute it and/or modify
>>> +# it under the terms of the GNU General Public License as published by
>>> +# the Free Software Foundation; either version 2 of the License, or
>>> +# (at your option) any later version.
>>> +#
>>> +# This program is distributed in the hope that it will be useful,
>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> +# GNU General Public License for more details.
>>> +
>>> +obj-y += builtin_ucode.o
>>> +
>>> +# Directory holding the microcode updates.
>>> +UCODE_DIR=$(patsubst "%",%,$(CONFIG_BUILTIN_UCODE_DIR))
>>> +amd-blobs := $(wildcard $(UCODE_DIR)/amd-ucode/*)
>>> +intel-blobs := $(wildcard $(UCODE_DIR)/intel-ucode/*)
>>
>> This is a little dangerous.  I can see why you want to do it like this,
>> and I can't provide any obvious suggestions, but if this glob picks up
>> anything which isn't a microcode file, it will break the logic to search
>> for the right blob.
>>
> 
> We can limit the amd-blobs and intel-blob to binaries following the 
> naming convention AuthenticAMD.bin and GenuineIntel.bin for amd and 
> intel, respectively. Yet, this would be imposing an unnecessary 
> restriction on administrators who may want to be innovative with naming 
> (or want to use the naming microcode_amd_*.bin or FF-MM-SS instead).
> 
> Alternatively, we can introduce CONFIG_BUILTIN_UCODE_INTEL and 
> CONFIG_BUILTIN_UCODE_AMD. Both default to empty strings. Then, an 
> administrator can specify exactly the microcodes to include relative to 
> the CONFIG_BUILTIN_UCODE_DIR. For example:
> CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/06-3a-09"
> CONFIG_BUILTIN_UCODE_AMD="amd-ucode/microcode_amd_fam15h.bin"

This would make the feature even less generic - I already meant to
ask whether building ucode into binaries is really a useful thing
when we already have more flexible ways. I could see this being
useful if there was no other way to make ucode available at boot
time.

Jan
Jan Beulich Dec. 10, 2019, 9:37 a.m. UTC | #4
On 09.12.2019 09:41, Eslam Elnikety wrote:
> --- a/docs/misc/xen-command-line.pandoc
> +++ b/docs/misc/xen-command-line.pandoc
> @@ -2113,7 +2113,7 @@ logic applies:
>     active by default.
>  
>  ### ucode (x86)
> -> `= List of [ <integer> | scan=<bool>, nmi=<bool> ]`
> +> `= List of [ <integer> | scan=<bool> | builtin=<bool>, nmi=<bool> ]`

Despite my other question regarding the usefulness of this as a
whole a few comments.

Do "scan" and "builtin" really need to exclude each other? I.e.
don't you mean , instead of | ?

> @@ -2128,6 +2128,9 @@ when used with xen.efi (there the concept of modules doesn't exist, and
>  the blob gets specified via the `ucode=<filename>` config file/section
>  entry; see [EFI configuration file description](efi.html)).
>  
> +'builtin' instructs the hypervisor to use the builtin microcode update. This
> +option is available only if option BUILTIN_UCODE is enabled.

You also want to clarify its default - your reply to Andrew
suggested to me that only the negative form would really be
useful.

> --- a/xen/arch/x86/Kconfig
> +++ b/xen/arch/x86/Kconfig
> @@ -218,6 +218,26 @@ config MEM_SHARING
>  	bool "Xen memory sharing support" if EXPERT = "y"
>  	depends on HVM
>  
> +config BUILTIN_UCODE
> +	def_bool n
> +	prompt "Support for Builtin Microcode"

These two lines should be folded into just

	bool "Support for Builtin Microcode"

irrespective of other bad examples you may find in the code base.
The again ...

> +	---help---
> +	  Include the CPU microcode update in the Xen image itself. With this
> +	  support, Xen can update the CPU microcode upon boot using the builtin
> +	  microcode, with no need for an additional microcode boot modules.
> +
> +	  If unsure, say N.
> +
> +config BUILTIN_UCODE_DIR
> +	string
> +	default "/lib/firmware"
> +	depends on BUILTIN_UCODE

... are two separate options needed at all? Can't this latter one
being the empty string just imply the feature to be disabled?

> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -7,6 +7,7 @@ subdir-y += mm
>  subdir-$(CONFIG_XENOPROF) += oprofile
>  subdir-$(CONFIG_PV) += pv
>  subdir-y += x86_64
> +subdir-$(CONFIG_BUILTIN_UCODE) += microcode

Please respect the (half way?) alphabetical sorting here, unless
adding last is a requirement (in which case a brief comment should
say so, and why).

> @@ -130,6 +138,10 @@ static int __init parse_ucode(const char *s)
>          {
>              if ( (val = parse_boolean("scan", s, ss)) >= 0 )
>                  ucode_scan = val;
> +#ifdef CONFIG_BUILTIN_UCODE
> +	    else if ( (val = parse_boolean("builtin", s, ss)) >= 0 )

Please watch out for hard tabs where they don't belong.

> @@ -237,6 +249,48 @@ void __init microcode_grab_module(
>  scan:
>      if ( ucode_scan )
>          microcode_scan_module(module_map, mbi);
> +
> +#ifdef CONFIG_BUILTIN_UCODE
> +    /*
> +     * Do not use the builtin microcode if:
> +     * (a) builtin has been explicitly turned off (e.g., ucode=no-builtin)
> +     * (b) a microcode module has been specified or a scan is successful
> +     */
> +    if ( !ucode_builtin || ucode_mod.mod_end || ucode_blob.size )
> +        return;
> +
> +    /* Set ucode_start/_end to the proper blob */
> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
> +        ucode_blob.size = (size_t)(__builtin_amd_ucode_end
> +                                   - __builtin_amd_ucode_start);
> +    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
> +        ucode_blob.size = (size_t)(__builtin_intel_ucode_end
> +                                   - __builtin_intel_ucode_start);
> +    else
> +        return;
> +
> +    if ( !ucode_blob.size )
> +    {
> +        printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
> +        return;
> +    }
> +    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )

With the "return" above please omit the "else" here. But why this
restriction, and ...

> +    {
> +        printk("Builtin microcode payload too big! (%ld, we can do %d)\n",
> +               ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
> +        ucode_blob.size = 0;
> +        return;
> +    }
> +
> +    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
> +    if ( !ucode_blob.data )
> +        return;
> +
> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
> +        memcpy(ucode_blob.data, __builtin_amd_ucode_start, ucode_blob.size);
> +    else
> +        memcpy(ucode_blob.data, __builtin_intel_ucode_start, ucode_blob.size);

... why the copying? Can't you simply point ucode_blob.data at
__builtin_{amd,intel}_ucode_start?

Jan
Eslam Elnikety Dec. 10, 2019, 10:40 p.m. UTC | #5
On 10.12.19 10:21, Jan Beulich wrote:
> On 09.12.2019 22:49, Eslam Elnikety wrote:
>> On 09.12.19 16:19, Andrew Cooper wrote:
>>> On 09/12/2019 08:41, Eslam Elnikety wrote:
>>>> --- /dev/null
>>>> +++ b/xen/arch/x86/microcode/Makefile
>>>> @@ -0,0 +1,40 @@
>>>> +# Copyright (C) 2019 Amazon.com, Inc. or its affiliates.
>>>> +# Author: Eslam Elnikety <elnikety@amazon.com>
>>>> +#
>>>> +# This program is free software; you can redistribute it and/or modify
>>>> +# it under the terms of the GNU General Public License as published by
>>>> +# the Free Software Foundation; either version 2 of the License, or
>>>> +# (at your option) any later version.
>>>> +#
>>>> +# This program is distributed in the hope that it will be useful,
>>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>> +# GNU General Public License for more details.
>>>> +
>>>> +obj-y += builtin_ucode.o
>>>> +
>>>> +# Directory holding the microcode updates.
>>>> +UCODE_DIR=$(patsubst "%",%,$(CONFIG_BUILTIN_UCODE_DIR))
>>>> +amd-blobs := $(wildcard $(UCODE_DIR)/amd-ucode/*)
>>>> +intel-blobs := $(wildcard $(UCODE_DIR)/intel-ucode/*)
>>>
>>> This is a little dangerous.  I can see why you want to do it like this,
>>> and I can't provide any obvious suggestions, but if this glob picks up
>>> anything which isn't a microcode file, it will break the logic to search
>>> for the right blob.
>>>
>>
>> We can limit the amd-blobs and intel-blob to binaries following the
>> naming convention AuthenticAMD.bin and GenuineIntel.bin for amd and
>> intel, respectively. Yet, this would be imposing an unnecessary
>> restriction on administrators who may want to be innovative with naming
>> (or want to use the naming microcode_amd_*.bin or FF-MM-SS instead).
>>
>> Alternatively, we can introduce CONFIG_BUILTIN_UCODE_INTEL and
>> CONFIG_BUILTIN_UCODE_AMD. Both default to empty strings. Then, an
>> administrator can specify exactly the microcodes to include relative to
>> the CONFIG_BUILTIN_UCODE_DIR. For example:
>> CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/06-3a-09"
>> CONFIG_BUILTIN_UCODE_AMD="amd-ucode/microcode_amd_fam15h.bin"
> 
> This would make the feature even less generic - I already meant to

I do not follow the point about being less generic. (I hope my example 
did not give the false impression that CONFIG_BUILTIN_UCODE_{AMD,INTEL} 
allow for only a single microcode blob for a single signature).

> ask whether building ucode into binaries is really a useful thing
> when we already have more flexible ways. I could see this being
> useful if there was no other way to make ucode available at boot
> time.

It is useful in addition to the existing ways to do early microcode 
updates. First, when operating many hosts, using boot modules (either a 
distinct microcode module or within an initrd) becomes involved. For 
instance, tools to update boot entries (e.g., 
https://linux.die.net/man/8/grubby) do not support adding arbitrary 
(microcode) modules.

Second, there is often need to couple a Xen build with a minimum 
microcode patch level. Having the microcode built within the Xen image 
itself is a streamlined, natural way of achieving that.

Thanks,
Eslam

> 
> Jan
>
Eslam Elnikety Dec. 10, 2019, 11:18 p.m. UTC | #6
On 10.12.19 10:37, Jan Beulich wrote:
> On 09.12.2019 09:41, Eslam Elnikety wrote:
>> --- a/docs/misc/xen-command-line.pandoc
>> +++ b/docs/misc/xen-command-line.pandoc
>> @@ -2113,7 +2113,7 @@ logic applies:
>>      active by default.
>>   
>>   ### ucode (x86)
>> -> `= List of [ <integer> | scan=<bool>, nmi=<bool> ]`
>> +> `= List of [ <integer> | scan=<bool> | builtin=<bool>, nmi=<bool> ]`
> 
> Despite my other question regarding the usefulness of this as a
> whole a few comments.
> 
> Do "scan" and "builtin" really need to exclude each other? I.e.
> don't you mean , instead of | ?
The useful case here would be specifying ucode=scan,builtin which would 
translate to fallback onto the builtin microcode if a scan fails. In 
fact, this is already the case given the implementation in v1. It will 
be better to clarify this semantic by allowing scan,builtin.

On that note, I really see the "<integer>" and "scan=<bool>" to be 
equal. Following the logic earlier we should probably also allow 
ucode=<integer>,builtin. This translates to fallback to builtin if there 
are no modules at index <integer>.

> 
>> @@ -2128,6 +2128,9 @@ when used with xen.efi (there the concept of modules doesn't exist, and
>>   the blob gets specified via the `ucode=<filename>` config file/section
>>   entry; see [EFI configuration file description](efi.html)).
>>   
>> +'builtin' instructs the hypervisor to use the builtin microcode update. This
>> +option is available only if option BUILTIN_UCODE is enabled.
> 
> You also want to clarify its default - your reply to Andrew
> suggested to me that only the negative form would really be
> useful.
> 

Indeed. This in any case will need a revamp to rework the ", instead of 
|". Will address in v2.

>> --- a/xen/arch/x86/Kconfig
>> +++ b/xen/arch/x86/Kconfig
>> @@ -218,6 +218,26 @@ config MEM_SHARING
>>   	bool "Xen memory sharing support" if EXPERT = "y"
>>   	depends on HVM
>>   
>> +config BUILTIN_UCODE
>> +	def_bool n
>> +	prompt "Support for Builtin Microcode"
> 
> These two lines should be folded into just
> 
> 	bool "Support for Builtin Microcode"
> 
> irrespective of other bad examples you may find in the code base.
> The again ...
> 

Will address in v2.

>> +	---help---
>> +	  Include the CPU microcode update in the Xen image itself. With this
>> +	  support, Xen can update the CPU microcode upon boot using the builtin
>> +	  microcode, with no need for an additional microcode boot modules.
>> +
>> +	  If unsure, say N.
>> +
>> +config BUILTIN_UCODE_DIR
>> +	string
>> +	default "/lib/firmware"
>> +	depends on BUILTIN_UCODE
> 
> ... are two separate options needed at all? Can't this latter one
> being the empty string just imply the feature to be disabled?
> 

I can go either way here. To me, two options is clearer.

>> --- a/xen/arch/x86/Makefile
>> +++ b/xen/arch/x86/Makefile
>> @@ -7,6 +7,7 @@ subdir-y += mm
>>   subdir-$(CONFIG_XENOPROF) += oprofile
>>   subdir-$(CONFIG_PV) += pv
>>   subdir-y += x86_64
>> +subdir-$(CONFIG_BUILTIN_UCODE) += microcode
> 
> Please respect the (half way?) alphabetical sorting here, unless
> adding last is a requirement (in which case a brief comment should
> say so, and why).
> 
>> @@ -130,6 +138,10 @@ static int __init parse_ucode(const char *s)
>>           {
>>               if ( (val = parse_boolean("scan", s, ss)) >= 0 )
>>                   ucode_scan = val;
>> +#ifdef CONFIG_BUILTIN_UCODE
>> +	    else if ( (val = parse_boolean("builtin", s, ss)) >= 0 )
> 
> Please watch out for hard tabs where they don't belong.
> 

Good catch! Will fix in v2.

>> @@ -237,6 +249,48 @@ void __init microcode_grab_module(
>>   scan:
>>       if ( ucode_scan )
>>           microcode_scan_module(module_map, mbi);
>> +
>> +#ifdef CONFIG_BUILTIN_UCODE
>> +    /*
>> +     * Do not use the builtin microcode if:
>> +     * (a) builtin has been explicitly turned off (e.g., ucode=no-builtin)
>> +     * (b) a microcode module has been specified or a scan is successful
>> +     */
>> +    if ( !ucode_builtin || ucode_mod.mod_end || ucode_blob.size )
>> +        return;
>> +
>> +    /* Set ucode_start/_end to the proper blob */
>> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
>> +        ucode_blob.size = (size_t)(__builtin_amd_ucode_end
>> +                                   - __builtin_amd_ucode_start);
>> +    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
>> +        ucode_blob.size = (size_t)(__builtin_intel_ucode_end
>> +                                   - __builtin_intel_ucode_start);
>> +    else
>> +        return;
>> +
>> +    if ( !ucode_blob.size )
>> +    {
>> +        printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
>> +        return;
>> +    }
>> +    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )
> 
> With the "return" above please omit the "else" here. But why this
> restriction, and ...
> 

Will adjust in v2.

>> +    {
>> +        printk("Builtin microcode payload too big! (%ld, we can do %d)\n",
>> +               ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
>> +        ucode_blob.size = 0;
>> +        return;
>> +    }
>> +
>> +    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
>> +    if ( !ucode_blob.data )
>> +        return;
>> +
>> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
>> +        memcpy(ucode_blob.data, __builtin_amd_ucode_start, ucode_blob.size);
>> +    else
>> +        memcpy(ucode_blob.data, __builtin_intel_ucode_start, ucode_blob.size);
> 
> ... why the copying? Can't you simply point ucode_blob.data at
> __builtin_{amd,intel}_ucode_start?

I am all onboard. See my earlier response to Andrew. I used the same 
logic that already exists for scan (which assumes that ucode_blob.data 
is allocated and should be freed when all CPUs are updated).

Thanks for the comments, Jan. On the earlier discussion, please do let 
me know (and others too) if you are convinced that builtin support for 
microcode is warranted/useful.

Cheers,
Eslam

> 
> Jan
>
Jan Beulich Dec. 11, 2019, 9:47 a.m. UTC | #7
On 10.12.2019 23:40, Eslam Elnikety wrote:
> On 10.12.19 10:21, Jan Beulich wrote:
>> On 09.12.2019 22:49, Eslam Elnikety wrote:
>>> On 09.12.19 16:19, Andrew Cooper wrote:
>>>> On 09/12/2019 08:41, Eslam Elnikety wrote:
>>>>> --- /dev/null
>>>>> +++ b/xen/arch/x86/microcode/Makefile
>>>>> @@ -0,0 +1,40 @@
>>>>> +# Copyright (C) 2019 Amazon.com, Inc. or its affiliates.
>>>>> +# Author: Eslam Elnikety <elnikety@amazon.com>
>>>>> +#
>>>>> +# This program is free software; you can redistribute it and/or modify
>>>>> +# it under the terms of the GNU General Public License as published by
>>>>> +# the Free Software Foundation; either version 2 of the License, or
>>>>> +# (at your option) any later version.
>>>>> +#
>>>>> +# This program is distributed in the hope that it will be useful,
>>>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>>> +# GNU General Public License for more details.
>>>>> +
>>>>> +obj-y += builtin_ucode.o
>>>>> +
>>>>> +# Directory holding the microcode updates.
>>>>> +UCODE_DIR=$(patsubst "%",%,$(CONFIG_BUILTIN_UCODE_DIR))
>>>>> +amd-blobs := $(wildcard $(UCODE_DIR)/amd-ucode/*)
>>>>> +intel-blobs := $(wildcard $(UCODE_DIR)/intel-ucode/*)
>>>>
>>>> This is a little dangerous.  I can see why you want to do it like this,
>>>> and I can't provide any obvious suggestions, but if this glob picks up
>>>> anything which isn't a microcode file, it will break the logic to search
>>>> for the right blob.
>>>>
>>>
>>> We can limit the amd-blobs and intel-blob to binaries following the
>>> naming convention AuthenticAMD.bin and GenuineIntel.bin for amd and
>>> intel, respectively. Yet, this would be imposing an unnecessary
>>> restriction on administrators who may want to be innovative with naming
>>> (or want to use the naming microcode_amd_*.bin or FF-MM-SS instead).
>>>
>>> Alternatively, we can introduce CONFIG_BUILTIN_UCODE_INTEL and
>>> CONFIG_BUILTIN_UCODE_AMD. Both default to empty strings. Then, an
>>> administrator can specify exactly the microcodes to include relative to
>>> the CONFIG_BUILTIN_UCODE_DIR. For example:
>>> CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/06-3a-09"
>>> CONFIG_BUILTIN_UCODE_AMD="amd-ucode/microcode_amd_fam15h.bin"
>>
>> This would make the feature even less generic - I already meant to
> 
> I do not follow the point about being less generic. (I hope my example 
> did not give the false impression that CONFIG_BUILTIN_UCODE_{AMD,INTEL} 
> allow for only a single microcode blob for a single signature).

Well, the example indeed has given this impression to me. I'm
having a hard time seeing how, beyond very narrow special cases,
either of the examples could be useful to anyone. Yet I think
examples should be generally useful.

>> ask whether building ucode into binaries is really a useful thing
>> when we already have more flexible ways. I could see this being
>> useful if there was no other way to make ucode available at boot
>> time.
> 
> It is useful in addition to the existing ways to do early microcode 
> updates. First, when operating many hosts, using boot modules (either a 
> distinct microcode module or within an initrd) becomes involved. For 
> instance, tools to update boot entries (e.g., 
> https://linux.die.net/man/8/grubby) do not support adding arbitrary 
> (microcode) modules.

I.e. you suggest to work around tools shortcomings by extending
Xen? Wouldn't the more appropriate way to deal with this be to
make the tools more capable?

> Second, there is often need to couple a Xen build with a minimum 
> microcode patch level. Having the microcode built within the Xen image 
> itself is a streamlined, natural way of achieving that.

Okay, I can accept this as a reason, to some degree at least. Yet
as said elsewhere, I don't think you want then to override a
possible "external" ucode module with the builtin blobs. Instead
the newest of everything that's available should then be loaded.

Jan
Jan Beulich Dec. 11, 2019, 9:54 a.m. UTC | #8
On 11.12.2019 00:18, Eslam Elnikety wrote:
> On 10.12.19 10:37, Jan Beulich wrote:
>> On 09.12.2019 09:41, Eslam Elnikety wrote:
>>> --- a/docs/misc/xen-command-line.pandoc
>>> +++ b/docs/misc/xen-command-line.pandoc
>>> @@ -2113,7 +2113,7 @@ logic applies:
>>>      active by default.
>>>   
>>>   ### ucode (x86)
>>> -> `= List of [ <integer> | scan=<bool>, nmi=<bool> ]`
>>> +> `= List of [ <integer> | scan=<bool> | builtin=<bool>, nmi=<bool> ]`
>>
>> Despite my other question regarding the usefulness of this as a
>> whole a few comments.
>>
>> Do "scan" and "builtin" really need to exclude each other? I.e.
>> don't you mean , instead of | ?
> The useful case here would be specifying ucode=scan,builtin which would 
> translate to fallback onto the builtin microcode if a scan fails. In 
> fact, this is already the case given the implementation in v1. It will 
> be better to clarify this semantic by allowing scan,builtin.
> 
> On that note, I really see the "<integer>" and "scan=<bool>" to be 
> equal. Following the logic earlier we should probably also allow 
> ucode=<integer>,builtin. This translates to fallback to builtin if there 
> are no modules at index <integer>.

Almost - if the builtin one is newer than the separate blob, then
either of the cmdline options you name should still cause the
builtin one to be loaded. IOW you want to honor both options, not
prefer the earlier over a later one.

>>> +	---help---
>>> +	  Include the CPU microcode update in the Xen image itself. With this
>>> +	  support, Xen can update the CPU microcode upon boot using the builtin
>>> +	  microcode, with no need for an additional microcode boot modules.
>>> +
>>> +	  If unsure, say N.
>>> +
>>> +config BUILTIN_UCODE_DIR
>>> +	string
>>> +	default "/lib/firmware"
>>> +	depends on BUILTIN_UCODE
>>
>> ... are two separate options needed at all? Can't this latter one
>> being the empty string just imply the feature to be disabled?
> 
> I can go either way here. To me, two options is clearer.

It's the other way around here, but I'd accept being outvoted.

>>> +    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
>>> +    if ( !ucode_blob.data )
>>> +        return;
>>> +
>>> +    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
>>> +        memcpy(ucode_blob.data, __builtin_amd_ucode_start, ucode_blob.size);
>>> +    else
>>> +        memcpy(ucode_blob.data, __builtin_intel_ucode_start, ucode_blob.size);
>>
>> ... why the copying? Can't you simply point ucode_blob.data at
>> __builtin_{amd,intel}_ucode_start?
> 
> I am all onboard. See my earlier response to Andrew. I used the same 
> logic that already exists for scan (which assumes that ucode_blob.data 
> is allocated and should be freed when all CPUs are updated).

The scan case may be different in that it may not lend itself
to re-using the blob in its original location. If that's not
the reason for the present behavior, then I think we would
want to do away with the unnecessary copying there as well.

Jan
Eslam Elnikety Dec. 12, 2019, 10:13 p.m. UTC | #9
On 11.12.19 10:47, Jan Beulich wrote:
> On 10.12.2019 23:40, Eslam Elnikety wrote:
>> On 10.12.19 10:21, Jan Beulich wrote:
>>> On 09.12.2019 22:49, Eslam Elnikety wrote:
>>>> On 09.12.19 16:19, Andrew Cooper wrote:
>>>>> On 09/12/2019 08:41, Eslam Elnikety wrote:
>>>>>> --- /dev/null
>>>>>> +++ b/xen/arch/x86/microcode/Makefile
>>>>>> @@ -0,0 +1,40 @@
>>>>>> +# Copyright (C) 2019 Amazon.com, Inc. or its affiliates.
>>>>>> +# Author: Eslam Elnikety <elnikety@amazon.com>
>>>>>> +#
>>>>>> +# This program is free software; you can redistribute it and/or modify
>>>>>> +# it under the terms of the GNU General Public License as published by
>>>>>> +# the Free Software Foundation; either version 2 of the License, or
>>>>>> +# (at your option) any later version.
>>>>>> +#
>>>>>> +# This program is distributed in the hope that it will be useful,
>>>>>> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
>>>>>> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>>>>> +# GNU General Public License for more details.
>>>>>> +
>>>>>> +obj-y += builtin_ucode.o
>>>>>> +
>>>>>> +# Directory holding the microcode updates.
>>>>>> +UCODE_DIR=$(patsubst "%",%,$(CONFIG_BUILTIN_UCODE_DIR))
>>>>>> +amd-blobs := $(wildcard $(UCODE_DIR)/amd-ucode/*)
>>>>>> +intel-blobs := $(wildcard $(UCODE_DIR)/intel-ucode/*)
>>>>>
>>>>> This is a little dangerous.  I can see why you want to do it like this,
>>>>> and I can't provide any obvious suggestions, but if this glob picks up
>>>>> anything which isn't a microcode file, it will break the logic to search
>>>>> for the right blob.
>>>>>
>>>>
>>>> We can limit the amd-blobs and intel-blob to binaries following the
>>>> naming convention AuthenticAMD.bin and GenuineIntel.bin for amd and
>>>> intel, respectively. Yet, this would be imposing an unnecessary
>>>> restriction on administrators who may want to be innovative with naming
>>>> (or want to use the naming microcode_amd_*.bin or FF-MM-SS instead).
>>>>
>>>> Alternatively, we can introduce CONFIG_BUILTIN_UCODE_INTEL and
>>>> CONFIG_BUILTIN_UCODE_AMD. Both default to empty strings. Then, an
>>>> administrator can specify exactly the microcodes to include relative to
>>>> the CONFIG_BUILTIN_UCODE_DIR. For example:
>>>> CONFIG_BUILTIN_UCODE_INTEL="intel-ucode/06-3a-09"
>>>> CONFIG_BUILTIN_UCODE_AMD="amd-ucode/microcode_amd_fam15h.bin"
>>>
>>> This would make the feature even less generic - I already meant to
>>
>> I do not follow the point about being less generic. (I hope my example
>> did not give the false impression that CONFIG_BUILTIN_UCODE_{AMD,INTEL}
>> allow for only a single microcode blob for a single signature).
> 
> Well, the example indeed has given this impression to me. I'm
> having a hard time seeing how, beyond very narrow special cases,
> either of the examples could be useful to anyone. Yet I think
> examples should be generally useful.
> 

Andrew's earlier response hinted at "what can we do to avoid picking 
random bits in the builtin microcode?" My response was outlining 
alternatives, and the examples there were not meant to be useful beyond 
explaining those alternatives.

>>> ask whether building ucode into binaries is really a useful thing
>>> when we already have more flexible ways. I could see this being
>>> useful if there was no other way to make ucode available at boot
>>> time.
>>
>> It is useful in addition to the existing ways to do early microcode
>> updates. First, when operating many hosts, using boot modules (either a
>> distinct microcode module or within an initrd) becomes involved. For
>> instance, tools to update boot entries (e.g.,
>> https://linux.die.net/man/8/grubby) do not support adding arbitrary
>> (microcode) modules.
> 
> I.e. you suggest to work around tools shortcomings by extending
> Xen? Wouldn't the more appropriate way to deal with this be to
> make the tools more capable?
> 
>> Second, there is often need to couple a Xen build with a minimum
>> microcode patch level. Having the microcode built within the Xen image
>> itself is a streamlined, natural way of achieving that.
> 
> Okay, I can accept this as a reason, to some degree at least. Yet
> as said elsewhere, I don't think you want then to override a
> possible "external" ucode module with the builtin blobs. Instead
> the newest of everything that's available should then be loaded.

Extending Xen to work around tools shortcomings is absolutely not what I 
have in mind. I should have started with the second reason. Read this 
as: Xen relies on a minimum microcode feature set, and it makes sense to 
couple both in one binary. This coupling just happens to provide an 
added benefit in the face of tools shortcoming.

Thanks,
Eslam
Eslam Elnikety Dec. 12, 2019, 10:17 p.m. UTC | #10
On 11.12.19 10:54, Jan Beulich wrote:
> On 11.12.2019 00:18, Eslam Elnikety wrote:
>> On 10.12.19 10:37, Jan Beulich wrote:
>>> On 09.12.2019 09:41, Eslam Elnikety wrote:
>>>> --- a/docs/misc/xen-command-line.pandoc
>>>> +++ b/docs/misc/xen-command-line.pandoc
>>>> @@ -2113,7 +2113,7 @@ logic applies:
>>>>       active by default.
>>>>    
>>>>    ### ucode (x86)
>>>> -> `= List of [ <integer> | scan=<bool>, nmi=<bool> ]`
>>>> +> `= List of [ <integer> | scan=<bool> | builtin=<bool>, nmi=<bool> ]`
>>>
>>> Despite my other question regarding the usefulness of this as a
>>> whole a few comments.
>>>
>>> Do "scan" and "builtin" really need to exclude each other? I.e.
>>> don't you mean , instead of | ?
>> The useful case here would be specifying ucode=scan,builtin which would
>> translate to fallback onto the builtin microcode if a scan fails. In
>> fact, this is already the case given the implementation in v1. It will
>> be better to clarify this semantic by allowing scan,builtin.
>>
>> On that note, I really see the "<integer>" and "scan=<bool>" to be
>> equal. Following the logic earlier we should probably also allow
>> ucode=<integer>,builtin. This translates to fallback to builtin if there
>> are no modules at index <integer>.
> 
> Almost - if the builtin one is newer than the separate blob, then
> either of the cmdline options you name should still cause the
> builtin one to be loaded. IOW you want to honor both options, not
> prefer the earlier over a later one.
> 

On the "newest of everything": That's not what I intend to propose. The 
microcode provided via a scan (or <integer> for that matter) will always 
override the builtin microcode. The common case would be that the 
microcode provided via a scan (or <integer>) is newer than the builtin. 
Yet, an administrator will have the option, if needed, to use an older 
microcode via a scan (or <integer>) to override a newer builtin microcode.


-- Eslam
Jan Beulich Dec. 13, 2019, 9:58 a.m. UTC | #11
On 12.12.2019 23:17, Eslam Elnikety wrote:
> On the "newest of everything": That's not what I intend to propose. The 
> microcode provided via a scan (or <integer> for that matter) will always 
> override the builtin microcode. The common case would be that the 
> microcode provided via a scan (or <integer>) is newer than the builtin. 
> Yet, an administrator will have the option, if needed, to use an older 
> microcode via a scan (or <integer>) to override a newer builtin microcode.

That's a fair enough model, but it wants spelling out in the patch
description.

Jan
Andrew Cooper Dec. 13, 2019, 1:40 p.m. UTC | #12
On 09/12/2019 21:49, Eslam Elnikety wrote:
>>> +
>>> +extern const char __builtin_intel_ucode_start[],
>>> __builtin_intel_ucode_end[];
>>> +extern const char __builtin_amd_ucode_start[],
>>> __builtin_amd_ucode_end[];
>>> +#endif
>>> +
>>>   /* By default, ucode loading is done in NMI handler */
>>>   static bool ucode_in_nmi = true;
>>>   @@ -110,9 +118,9 @@ void __init microcode_set_module(unsigned int
>>> idx)
>>>   }
>>>     /*
>>> - * The format is '[<integer>|scan=<bool>, nmi=<bool>]'. Both
>>> options are
>>> - * optional. If the EFI has forced which of the multiboot payloads
>>> is to be
>>> - * used, only nmi=<bool> is parsed.
>>> + * The format is '[<integer>|scan=<bool>|builtin=<bool>,
>>> nmi=<bool>]'. All
>>> + * options are optional. If the EFI has forced which of the
>>> multiboot payloads
>>> + * is to be used, only nmi=<bool> is parsed.
>>>    */
>>
>> Please delete this, or I'll do a prereq patch to fix it and the command
>> line docs.  (Both are in a poor state.)
>>
>
> Unless you are planning that along your on-going
> docs/hypervisor-guide/microcode-loading.rst effort, I can pick up this
> clean-up/prereq patch myself. What do you have in mind? (Or point me
> to a good example and I will figure things out).

c/s 3c5552954, 53a84f672, 633a40947 or 3136dee9c are good examples. 
ucode= is definitely more complicated to explain because of its implicit
EFI behaviour.

>
>>> +    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
>>> +        ucode_blob.size = (size_t)(__builtin_intel_ucode_end
>>> +                                   - __builtin_intel_ucode_start);
>>> +    else
>>> +        return;
>>> +
>>> +    if ( !ucode_blob.size )
>>> +    {
>>> +        printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
>>> +        return;
>>> +    }
>>> +    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )
>>> +    {
>>> +        printk("Builtin microcode payload too big! (%ld, we can do
>>> %d)\n",
>>> +               ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
>>> +        ucode_blob.size = 0;
>>> +        return;
>>> +    }
>>> +
>>> +    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
>>> +    if ( !ucode_blob.data )
>>> +        return;
>>
>> Any chance we can reuse the "fits" logic to avoid holding every
>> inapplicable blob in memory as well?
>>
>
> I think this would be a welcomed change. It seems to me that we have
> two ways to go about it.
>
> 1) We factor the code in the intel-/amd-specific cpu_request_microcode
> to extract logic for finding a match into its own new function, expose
> that through microcode_ops, and finally do xalloc only for the
> matching microcode when early loading is scan or builtin.
>
> 2) Cannot we just do away completely with xalloc? I see that each
> individual microcode update gets allocated anyway in
> microcode_intel.c/get_next_ucode_from_buffer() and in
> microcode_amd.c/cpu_request_microcode(). Unless I am missing
> something, the xmalloc_bytes for ucode_blob.data is redundant.
>
> Thoughts?

I'm certain the code is more complicated than it needs to be. 
Cleanup/simplification would be very welcome.  And if you're up for
that, there is a related area which would be a great improvement.

At the moment, BSP microcode loading is very late because it depends on
this xmalloc() to begin with.  However, no memory allocation is needed
to load microcode from a multiboot module or from the initrd, or from
this future builtin location - all loading can be done from a
directmap/bootmap pointer if needs be.

This would allow moving the BSP microcode to much earlier on boot,
probably somewhere between console setup and E820 handling.

One way or another, the microcode cache which persists past boot has to
be xmalloc()'d, because we will free the module/initrd/builtin.  It
would however be more friendly to AP's to only give them the single
correct piece of ucode, rather than everything to scan through.

(These behaviours and expectations are going to be a chunk of my
intended second microcode.rst doc, including a "be aware that machines
exist which do $X" section to cover some of the weirder corner cases we
have encountered.)

>
>>> +
>>> +builtin_ucode.o: Makefile $(amd-blobs) $(intel-blobs)
>>> +    # Create AMD microcode blob if there are AMD updates on the
>>> build system
>>> +    if [ ! -z "$(amd-blobs)" ]; then \
>>> +        cat $(amd-blobs) > $@.bin ; \
>>> +        $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64
>>> --rename-section
>>> .data=.builtin_amd_ucode,alloc,load,readonly,data,contents $@.bin
>>> $@.amd; \
>>> +        rm -f $@.bin; \
>>> +    fi
>>> +    # Create INTEL microcode blob if there are INTEL updates on the
>>> build system
>>> +    if [ ! -z "$(intel-blobs)" ]; then \
>>> +        cat $(intel-blobs) > $@.bin; \
>>> +        $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64
>>> --rename-section
>>> .data=.builtin_intel_ucode,alloc,load,readonly,data,contents $@.bin
>>> $@.intel; \
>>> +        rm -f $@.bin; \
>>> +    fi
>>> +    # Create fake builtin_ucode.o if no updates were present.
>>> Otherwise, builtin_ucode.o carries the available updates
>>> +    if [ -z "$(amd-blobs)" -a -z "$(intel-blobs)" ]; then \
>>> +        $(CC) $(CFLAGS) -c -x c /dev/null -o $@; \
>>> +    else \
>>> +        $(LD) $(LDFLAGS) -r -o $@ $@.*; \
>>> +        rm -f $@.*; \
>>> +    fi
>>
>> How about using weak symbols, rather than playing games like this?
>
> Just to make sure we are on the same page. You are after a dummy
> binary with weak symbols that eventually get overridden when I link
> the actual microcode binaries into builtin_ucode.o? If so, possible of
> course. Except that I do not particularly see the downside of the
> existing approach with dummy builtin_ucode.o.

Actually, you don't even need week symbols.  Size being 0 means that no
blob was inserted.

There doesn't appear to be a need to organise a dummy builtin_ucode.o,
or to manually merge Intel/AMD together.  Simply make obj-y +=
ucode-$VENDOR.o dependent on there being some blob to insert.

~Andrew
Andrew Cooper Dec. 13, 2019, 1:57 p.m. UTC | #13
On 12/12/2019 22:13, Eslam Elnikety wrote:
>>> Second, there is often need to couple a Xen build with a minimum
>>> microcode patch level. Having the microcode built within the Xen image
>>> itself is a streamlined, natural way of achieving that.
>>
>> Okay, I can accept this as a reason, to some degree at least. Yet
>> as said elsewhere, I don't think you want then to override a
>> possible "external" ucode module with the builtin blobs. Instead
>> the newest of everything that's available should then be loaded.
>
> Extending Xen to work around tools shortcomings is absolutely not what
> I have in mind. I should have started with the second reason. Read
> this as: Xen relies on a minimum microcode feature set, and it makes
> sense to couple both in one binary. This coupling just happens to
> provide an added benefit in the face of tools shortcoming.

Do we have anything which strictly relies on a minimum version?

I can definitely see the value of bundling the ucode and saying "this is
the minimum we will tolerate" from a supportability point of view.

There is also value when it comes to easier SRTM/DRTM measurements of
the system in question, including cases where Xen sits on a boot ROM
rather than on disk.

~Andrew
Tamas K Lengyel Dec. 13, 2019, 8:15 p.m. UTC | #14
> There is also value when it comes to easier SRTM/DRTM measurements of
> the system in question, including cases where Xen sits on a boot ROM
> rather than on disk.

We've explored that in the past - building things into Xen and Linux
statically - and ultimately it only works if the command line passed
to Xen also gets measured, otherwise you can always override any
built-in component. So for example with OpenXT on UEFI the entire Xen
config file gets measured. For DRTM I don't think it makes much
difference, I believe the active microcode info is already part of the
measurement, so having it measured as part of the Xen blob doesn't add
anything.

Tamas
Andrew Cooper Dec. 14, 2019, 12:37 a.m. UTC | #15
On 13/12/2019 20:15, Tamas K Lengyel wrote:
>> There is also value when it comes to easier SRTM/DRTM measurements of
>> the system in question, including cases where Xen sits on a boot ROM
>> rather than on disk.
> We've explored that in the past - building things into Xen and Linux
> statically - and ultimately it only works if the command line passed
> to Xen also gets measured, otherwise you can always override any
> built-in component. So for example with OpenXT on UEFI the entire Xen
> config file gets measured.

What I meant was "its one fewer random knobble which needs handling
specially".

> For DRTM I don't think it makes much
> difference, I believe the active microcode info is already part of the
> measurement, so having it measured as part of the Xen blob doesn't add
> anything.

I couldn't possibly comment on timelines, but if I could, the answer
might be "not for a little while yet".

For now, microcode is very definitely software's problem to include in
measurements.

~Andrew
Tamas K Lengyel Dec. 15, 2019, 4:10 p.m. UTC | #16
> > For DRTM I don't think it makes much
> > difference, I believe the active microcode info is already part of the
> > measurement, so having it measured as part of the Xen blob doesn't add
> > anything.
>
> I couldn't possibly comment on timelines, but if I could, the answer
> might be "not for a little while yet".
>
> For now, microcode is very definitely software's problem to include in
> measurements.

Ah right, I was mixing it up with the ACM blob that gets measured there.

Tamas
Eslam Elnikety Dec. 17, 2019, 10:41 p.m. UTC | #17
On 13.12.19 14:57, Andrew Cooper wrote:
> On 12/12/2019 22:13, Eslam Elnikety wrote:
>>>> Second, there is often need to couple a Xen build with a minimum
>>>> microcode patch level. Having the microcode built within the Xen image
>>>> itself is a streamlined, natural way of achieving that.
>>>
>>> Okay, I can accept this as a reason, to some degree at least. Yet
>>> as said elsewhere, I don't think you want then to override a
>>> possible "external" ucode module with the builtin blobs. Instead
>>> the newest of everything that's available should then be loaded.
>>
>> Extending Xen to work around tools shortcomings is absolutely not what
>> I have in mind. I should have started with the second reason. Read
>> this as: Xen relies on a minimum microcode feature set, and it makes
>> sense to couple both in one binary. This coupling just happens to
>> provide an added benefit in the face of tools shortcoming.
> 
> Do we have anything which strictly relies on a minimum version?

I had in mind microcode speculation mitigation features when reasoning 
with the minimum patch level argument.

-- Eslam
Andrew Cooper Dec. 17, 2019, 10:44 p.m. UTC | #18
On 17/12/2019 22:41, Eslam Elnikety wrote:
> On 13.12.19 14:57, Andrew Cooper wrote:
>> On 12/12/2019 22:13, Eslam Elnikety wrote:
>>>>> Second, there is often need to couple a Xen build with a minimum
>>>>> microcode patch level. Having the microcode built within the Xen
>>>>> image
>>>>> itself is a streamlined, natural way of achieving that.
>>>>
>>>> Okay, I can accept this as a reason, to some degree at least. Yet
>>>> as said elsewhere, I don't think you want then to override a
>>>> possible "external" ucode module with the builtin blobs. Instead
>>>> the newest of everything that's available should then be loaded.
>>>
>>> Extending Xen to work around tools shortcomings is absolutely not what
>>> I have in mind. I should have started with the second reason. Read
>>> this as: Xen relies on a minimum microcode feature set, and it makes
>>> sense to couple both in one binary. This coupling just happens to
>>> provide an added benefit in the face of tools shortcoming.
>>
>> Do we have anything which strictly relies on a minimum version?
>
> I had in mind microcode speculation mitigation features when reasoning
> with the minimum patch level argument.

Considering how well the first round of speculative microcode went,
mandating it would have been a rather bad thing...

But yes - as a usecase of "I wish to bundle the minimum microcode I'd
like to work with", this seems entirely reasonable.

~Andrew
Eslam Elnikety Dec. 18, 2019, 12:29 a.m. UTC | #19
On 13.12.19 14:40, Andrew Cooper wrote:
> On 09/12/2019 21:49, Eslam Elnikety wrote:
>>>> +
>>>> +extern const char __builtin_intel_ucode_start[],
>>>> __builtin_intel_ucode_end[];
>>>> +extern const char __builtin_amd_ucode_start[],
>>>> __builtin_amd_ucode_end[];
>>>> +#endif
>>>> +
>>>>    /* By default, ucode loading is done in NMI handler */
>>>>    static bool ucode_in_nmi = true;
>>>>    @@ -110,9 +118,9 @@ void __init microcode_set_module(unsigned int
>>>> idx)
>>>>    }
>>>>      /*
>>>> - * The format is '[<integer>|scan=<bool>, nmi=<bool>]'. Both
>>>> options are
>>>> - * optional. If the EFI has forced which of the multiboot payloads
>>>> is to be
>>>> - * used, only nmi=<bool> is parsed.
>>>> + * The format is '[<integer>|scan=<bool>|builtin=<bool>,
>>>> nmi=<bool>]'. All
>>>> + * options are optional. If the EFI has forced which of the
>>>> multiboot payloads
>>>> + * is to be used, only nmi=<bool> is parsed.
>>>>     */
>>>
>>> Please delete this, or I'll do a prereq patch to fix it and the command
>>> line docs.  (Both are in a poor state.)
>>>
>>
>> Unless you are planning that along your on-going
>> docs/hypervisor-guide/microcode-loading.rst effort, I can pick up this
>> clean-up/prereq patch myself. What do you have in mind? (Or point me
>> to a good example and I will figure things out).
> 
> c/s 3c5552954, 53a84f672, 633a40947 or 3136dee9c are good examples.
> ucode= is definitely more complicated to explain because of its implicit
> EFI behaviour.
> 

Currently massaging a patch to that effect.

>>>> +    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
>>>> +        ucode_blob.size = (size_t)(__builtin_intel_ucode_end
>>>> +                                   - __builtin_intel_ucode_start);
>>>> +    else
>>>> +        return;
>>>> +
>>>> +    if ( !ucode_blob.size )
>>>> +    {
>>>> +        printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
>>>> +        return;
>>>> +    }
>>>> +    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )
>>>> +    {
>>>> +        printk("Builtin microcode payload too big! (%ld, we can do
>>>> %d)\n",
>>>> +               ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
>>>> +        ucode_blob.size = 0;
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
>>>> +    if ( !ucode_blob.data )
>>>> +        return;
>>>
>>> Any chance we can reuse the "fits" logic to avoid holding every
>>> inapplicable blob in memory as well?
>>>
>>
>> I think this would be a welcomed change. It seems to me that we have
>> two ways to go about it.
>>
>> 1) We factor the code in the intel-/amd-specific cpu_request_microcode
>> to extract logic for finding a match into its own new function, expose
>> that through microcode_ops, and finally do xalloc only for the
>> matching microcode when early loading is scan or builtin.
>>
>> 2) Cannot we just do away completely with xalloc? I see that each
>> individual microcode update gets allocated anyway in
>> microcode_intel.c/get_next_ucode_from_buffer() and in
>> microcode_amd.c/cpu_request_microcode(). Unless I am missing
>> something, the xmalloc_bytes for ucode_blob.data is redundant.
>>
>> Thoughts?
> 
> I'm certain the code is more complicated than it needs to be.
> Cleanup/simplification would be very welcome.  And if you're up for
> that, there is a related area which would be a great improvement.
> 
> At the moment, BSP microcode loading is very late because it depends on
> this xmalloc() to begin with.  However, no memory allocation is needed
> to load microcode from a multiboot module or from the initrd, or from
> this future builtin location - all loading can be done from a
> directmap/bootmap pointer if needs be.
> 
> This would allow moving the BSP microcode to much earlier on boot,
> probably somewhere between console setup and E820 handling.
> 
> One way or another, the microcode cache which persists past boot has to
> be xmalloc()'d, because we will free the module/initrd/builtin.  It
> would however be more friendly to AP's to only give them the single
> correct piece of ucode, rather than everything to scan through.
> 
> (These behaviours and expectations are going to be a chunk of my
> intended second microcode.rst doc, including a "be aware that machines
> exist which do $X" section to cover some of the weirder corner cases we
> have encountered.)
> 

Avoiding the xmalloc/memcpy on the scan for microcode is one of the 
patches that I will share shortly. In particular, the ucode_blob.data 
would directly point to the buffer matching the canonical name within 
the cpio name space.

We are still a bit away from pushing the BSP microcode update earlier 
though. We will need to surgically remove all the unnecessary 
xmalloc/memcpy from within microcode_{amd,intel}.c. Also, as you hinted, 
the challenging bit is the per-cpu microcode cache.

>>>> +
>>>> +builtin_ucode.o: Makefile $(amd-blobs) $(intel-blobs)
>>>> +    # Create AMD microcode blob if there are AMD updates on the
>>>> build system
>>>> +    if [ ! -z "$(amd-blobs)" ]; then \
>>>> +        cat $(amd-blobs) > $@.bin ; \
>>>> +        $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64
>>>> --rename-section
>>>> .data=.builtin_amd_ucode,alloc,load,readonly,data,contents $@.bin
>>>> $@.amd; \
>>>> +        rm -f $@.bin; \
>>>> +    fi
>>>> +    # Create INTEL microcode blob if there are INTEL updates on the
>>>> build system
>>>> +    if [ ! -z "$(intel-blobs)" ]; then \
>>>> +        cat $(intel-blobs) > $@.bin; \
>>>> +        $(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64
>>>> --rename-section
>>>> .data=.builtin_intel_ucode,alloc,load,readonly,data,contents $@.bin
>>>> $@.intel; \
>>>> +        rm -f $@.bin; \
>>>> +    fi
>>>> +    # Create fake builtin_ucode.o if no updates were present.
>>>> Otherwise, builtin_ucode.o carries the available updates
>>>> +    if [ -z "$(amd-blobs)" -a -z "$(intel-blobs)" ]; then \
>>>> +        $(CC) $(CFLAGS) -c -x c /dev/null -o $@; \
>>>> +    else \
>>>> +        $(LD) $(LDFLAGS) -r -o $@ $@.*; \
>>>> +        rm -f $@.*; \
>>>> +    fi
>>>
>>> How about using weak symbols, rather than playing games like this?
>>
>> Just to make sure we are on the same page. You are after a dummy
>> binary with weak symbols that eventually get overridden when I link
>> the actual microcode binaries into builtin_ucode.o? If so, possible of
>> course. Except that I do not particularly see the downside of the
>> existing approach with dummy builtin_ucode.o.
> 
> Actually, you don't even need week symbols.  Size being 0 means that no
> blob was inserted.
> 
> There doesn't appear to be a need to organise a dummy builtin_ucode.o,
> or to manually merge Intel/AMD together.  Simply make obj-y +=
> ucode-$VENDOR.o dependent on there being some blob to insert.

I have reworked this part in v2 such that the configurations specify 
explicitly the individual microcode blobs to include. I have also 
adopted the "obj-y += ucode-$VENDOR.o" and made it dependent on the 
corresponding blobs being available. That said, I was not able to get 
rid of the dummy object. The dummy is still needed in case no amd nor 
intel ucode blobs were specified. In case of no microcode blobs, obj-y 
will not refer to any dependency within xen/arch/x86/microcode/ and 
there will be no rule to generate microcode/built_in.o (which is 
required for all subdir in xen/arch/x86/). Of course, we can do logic in 
xen/arch/x86/Makefile to mark microcode as a subdir iff there are 
microcode blobs available, but it seems to me that this logic does not 
belong there. Also, my initial attempt at this quickly proved that the 
dummy approach is way simpler.

-- Eslam

> 
> ~Andrew
>
diff mbox series

Patch

diff --git a/docs/misc/builtin-ucode.txt b/docs/misc/builtin-ucode.txt
new file mode 100644
index 0000000000..43bb60d3eb
--- /dev/null
+++ b/docs/misc/builtin-ucode.txt
@@ -0,0 +1,60 @@ 
+-------------------------------------------------
+Builtin Microcode Support for x86 (AMD and INTEL)
+-------------------------------------------------
+Author:
+  Eslam Elnikety <elnikety@amazon.com>
+Initial version:
+  Dec 2019
+-------------------------------------------------
+
+About:
+------
+* This documentation describes preparing the builtin microcode blobs to use as
+  builtin microcode update within the Xen image itself.
+
+* Support for builtin microcode is limited to x86.
+
+* Builtin support is available via the configurations BUILTIN_UCODE and
+  BUILTIN_UCODE_DIR. The first enables the support (default is off), and the
+  latter directs the build system to where it can find the microcode directory
+  (default is /lib/firmware).
+
+Microcode Directory:
+--------------------
+This directory holds the microcode blobs to be built in the Xen image. There
+are two subdirectories: amd-ucode and intel-ucode for AMD and INTEL,
+respectively.
+
+INTEL microcode blobs typically follow the naming format FF-MM-SS for
+{F}amily-{M}odel-{S}tepping. Alternatively, GenuineIntel.bin bundles a bunch
+of FF-MM-SS blobs into a single binary and the one matching the host CPU gets
+picked when performing the microcode update. For AMD, the canonical name is
+AuthenticAMD.bin. Similarly, such binary can bundle a bunch of microcode blobs
+for different families.
+
+The builtin microcode is generated by concatenating the microcode blobs under
+intel-ucode into GenuineIntel.bin, and those under amd-ucode into
+AuthenticAMD.bin. Those are then copied into the Xen image itself.
+
+Here is an example microcode directory structure, following the convention [1]:
+
+/lib/firmware
+|-- amd-ucode
+...
+|   |-- microcode_amd_fam15h.bin
+...
+|-- intel-ucode
+...
+|   |-- 06-3a-09
+...
+
+Alternatively, the subdirectories can directly contain GenuineIntel.bin and
+AuthenticAMD.bin (since both are concatenation of the individual microcode
+blobs and the end result is the same).
+
+An empty or non-existant subdirectory (amd-ucode and/or intel-ucode) excludes
+the respective AMD or INTEL microcode from being built in.
+
+Reference(s):
+-------------
+[1] https://www.kernel.org/doc/Documentation/x86/microcode.txt
diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index 891d2d439f..ba25db95da 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2113,7 +2113,7 @@  logic applies:
    active by default.
 
 ### ucode (x86)
-> `= List of [ <integer> | scan=<bool>, nmi=<bool> ]`
+> `= List of [ <integer> | scan=<bool> | builtin=<bool>, nmi=<bool> ]`
 
 Specify how and where to find CPU microcode update blob.
 
@@ -2128,6 +2128,9 @@  when used with xen.efi (there the concept of modules doesn't exist, and
 the blob gets specified via the `ucode=<filename>` config file/section
 entry; see [EFI configuration file description](efi.html)).
 
+'builtin' instructs the hypervisor to use the builtin microcode update. This
+option is available only if option BUILTIN_UCODE is enabled.
+
 'scan' instructs the hypervisor to scan the multiboot images for an cpio
 image that contains microcode. Depending on the platform the blob with the
 microcode in the cpio name space must be:
diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
index 02bb05f42e..14c5992d86 100644
--- a/xen/arch/x86/Kconfig
+++ b/xen/arch/x86/Kconfig
@@ -218,6 +218,26 @@  config MEM_SHARING
 	bool "Xen memory sharing support" if EXPERT = "y"
 	depends on HVM
 
+config BUILTIN_UCODE
+	def_bool n
+	prompt "Support for Builtin Microcode"
+	---help---
+	  Include the CPU microcode update in the Xen image itself. With this
+	  support, Xen can update the CPU microcode upon boot using the builtin
+	  microcode, with no need for an additional microcode boot modules.
+
+	  If unsure, say N.
+
+config BUILTIN_UCODE_DIR
+	string
+	default "/lib/firmware"
+	depends on BUILTIN_UCODE
+	---help---
+	  The directory containing the microcode blobs.
+
+	  See docs/misc/builtin-ucode.txt for how such directory should be
+	  structured to hold AMD and INTEL microcode.
+
 endmenu
 
 source "common/Kconfig"
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 7da5a2631e..8ac93a15a7 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -7,6 +7,7 @@  subdir-y += mm
 subdir-$(CONFIG_XENOPROF) += oprofile
 subdir-$(CONFIG_PV) += pv
 subdir-y += x86_64
+subdir-$(CONFIG_BUILTIN_UCODE) += microcode
 
 alternative-y := alternative.init.o
 alternative-$(CONFIG_LIVEPATCH) :=
diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
index 6ced293d88..7afbe44286 100644
--- a/xen/arch/x86/microcode.c
+++ b/xen/arch/x86/microcode.c
@@ -97,6 +97,14 @@  static struct ucode_mod_blob __initdata ucode_blob;
  */
 static bool_t __initdata ucode_scan;
 
+#ifdef CONFIG_BUILTIN_UCODE
+/* builtin is the default when BUILTIN_UCODE is set */
+static bool_t __initdata ucode_builtin = 1;
+
+extern const char __builtin_intel_ucode_start[], __builtin_intel_ucode_end[];
+extern const char __builtin_amd_ucode_start[], __builtin_amd_ucode_end[];
+#endif
+
 /* By default, ucode loading is done in NMI handler */
 static bool ucode_in_nmi = true;
 
@@ -110,9 +118,9 @@  void __init microcode_set_module(unsigned int idx)
 }
 
 /*
- * The format is '[<integer>|scan=<bool>, nmi=<bool>]'. Both options are
- * optional. If the EFI has forced which of the multiboot payloads is to be
- * used, only nmi=<bool> is parsed.
+ * The format is '[<integer>|scan=<bool>|builtin=<bool>, nmi=<bool>]'. All
+ * options are optional. If the EFI has forced which of the multiboot payloads
+ * is to be used, only nmi=<bool> is parsed.
  */
 static int __init parse_ucode(const char *s)
 {
@@ -130,6 +138,10 @@  static int __init parse_ucode(const char *s)
         {
             if ( (val = parse_boolean("scan", s, ss)) >= 0 )
                 ucode_scan = val;
+#ifdef CONFIG_BUILTIN_UCODE
+	    else if ( (val = parse_boolean("builtin", s, ss)) >= 0 )
+                ucode_builtin = val;
+#endif
             else
             {
                 const char *q;
@@ -237,6 +249,48 @@  void __init microcode_grab_module(
 scan:
     if ( ucode_scan )
         microcode_scan_module(module_map, mbi);
+
+#ifdef CONFIG_BUILTIN_UCODE
+    /*
+     * Do not use the builtin microcode if:
+     * (a) builtin has been explicitly turned off (e.g., ucode=no-builtin)
+     * (b) a microcode module has been specified or a scan is successful
+     */
+    if ( !ucode_builtin || ucode_mod.mod_end || ucode_blob.size )
+        return;
+
+    /* Set ucode_start/_end to the proper blob */
+    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+        ucode_blob.size = (size_t)(__builtin_amd_ucode_end
+                                   - __builtin_amd_ucode_start);
+    else if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+        ucode_blob.size = (size_t)(__builtin_intel_ucode_end
+                                   - __builtin_intel_ucode_start);
+    else
+        return;
+
+    if ( !ucode_blob.size )
+    {
+        printk("No builtin ucode! 'ucode=builtin' is nullified.\n");
+        return;
+    }
+    else if ( ucode_blob.size > MAX_EARLY_CPIO_MICROCODE )
+    {
+        printk("Builtin microcode payload too big! (%ld, we can do %d)\n",
+               ucode_blob.size, MAX_EARLY_CPIO_MICROCODE);
+        ucode_blob.size = 0;
+        return;
+    }
+
+    ucode_blob.data = xmalloc_bytes(ucode_blob.size);
+    if ( !ucode_blob.data )
+        return;
+
+    if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+        memcpy(ucode_blob.data, __builtin_amd_ucode_start, ucode_blob.size);
+    else
+        memcpy(ucode_blob.data, __builtin_intel_ucode_start, ucode_blob.size);
+#endif
 }
 
 const struct microcode_ops *microcode_ops;
diff --git a/xen/arch/x86/microcode/Makefile b/xen/arch/x86/microcode/Makefile
new file mode 100644
index 0000000000..6d585c5482
--- /dev/null
+++ b/xen/arch/x86/microcode/Makefile
@@ -0,0 +1,40 @@ 
+# Copyright (C) 2019 Amazon.com, Inc. or its affiliates.
+# Author: Eslam Elnikety <elnikety@amazon.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+obj-y += builtin_ucode.o
+
+# Directory holding the microcode updates.
+UCODE_DIR=$(patsubst "%",%,$(CONFIG_BUILTIN_UCODE_DIR))
+amd-blobs := $(wildcard $(UCODE_DIR)/amd-ucode/*)
+intel-blobs := $(wildcard $(UCODE_DIR)/intel-ucode/*)
+
+builtin_ucode.o: Makefile $(amd-blobs) $(intel-blobs)
+	# Create AMD microcode blob if there are AMD updates on the build system
+	if [ ! -z "$(amd-blobs)" ]; then \
+		cat $(amd-blobs) > $@.bin ; \
+		$(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 --rename-section .data=.builtin_amd_ucode,alloc,load,readonly,data,contents $@.bin $@.amd; \
+		rm -f $@.bin; \
+	fi
+	# Create INTEL microcode blob if there are INTEL updates on the build system
+	if [ ! -z "$(intel-blobs)" ]; then \
+		cat $(intel-blobs) > $@.bin; \
+		$(OBJCOPY) -I binary -O elf64-x86-64 -B i386:x86-64 --rename-section .data=.builtin_intel_ucode,alloc,load,readonly,data,contents $@.bin $@.intel; \
+		rm -f $@.bin; \
+	fi
+	# Create fake builtin_ucode.o if no updates were present. Otherwise, builtin_ucode.o carries the available updates
+	if [ -z "$(amd-blobs)" -a -z "$(intel-blobs)" ]; then \
+		$(CC) $(CFLAGS) -c -x c /dev/null -o $@; \
+	else \
+		$(LD) $(LDFLAGS) -r -o $@ $@.*; \
+		rm -f $@.*; \
+	fi
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 111edb5360..7a4c58c246 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -265,6 +265,18 @@  SECTIONS
        *(SORT(.data.vpci.*))
        __end_vpci_array = .;
 #endif
+
+#if defined(CONFIG_BUILTIN_UCODE)
+       . = ALIGN(POINTER_ALIGN);
+       __builtin_amd_ucode_start = .;
+       *(.builtin_amd_ucode)
+       __builtin_amd_ucode_end = .;
+
+       . = ALIGN(POINTER_ALIGN);
+       __builtin_intel_ucode_start = .;
+       *(.builtin_intel_ucode)
+       __builtin_intel_ucode_end = .;
+#endif
   } :text
 
   . = ALIGN(SECTION_ALIGN);