diff mbox series

[v5,07/12] x86/cpu/keylocker: Load an internal wrapping key at boot-time

Message ID 20220112211258.21115-8-chang.seok.bae@intel.com (mailing list archive)
State Not Applicable, archived
Delegated to: Mike Snitzer
Headers show
Series x86: Support Key Locker | expand

Commit Message

Chang S. Bae Jan. 12, 2022, 9:12 p.m. UTC
The Internal Wrapping Key (IWKey) is an entity of Key Locker to encode a
clear text key into a key handle. This key is a pivot in protecting user
keys. So the value has to be randomized before being loaded in the
software-invisible CPU state.

IWKey needs to be established before the first user. Given that the only
proposed Linux use case for Key Locker is dm-crypt, the feature could be
lazily enabled when the first dm-crypt user arrives, but there is no
precedent for late enabling of CPU features and it adds maintenance burden
without demonstrative benefit outside of minimizing the visibility of
Key Locker to userspace.

The kernel generates random bytes and load them at boot time. These bytes
are flushed out immediately.

Setting the CR4.KL bit does not always enable the feature so ensure the
dynamic CPU bit (CPUID.AESKLE) is set before loading the key.

Given that the Linux Key Locker support is only intended for bare metal
dm-crypt consumption, and that switching IWKey per VM is untenable,
explicitly skip Key Locker setup in the X86_FEATURE_HYPERVISOR case.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
---
Changes from RFC v2:
* Make bare metal only.
* Clean up the code (e.g. dynamically allocate the key cache).
  (Dan Williams)
* Massage the changelog.
* Move out the LOADIWKEY wrapper and the Key Locker CPUID defines.

Note, Dan wonders that given that the only proposed Linux use case for
Key Locker is dm-crypt, the feature could be lazily enabled when the
first dm-crypt user arrives, but as Dave notes there is no precedent
for late enabling of CPU features and it adds maintenance burden
without demonstrative benefit outside of minimizing the visibility of
Key Locker to userspace.
---
 arch/x86/include/asm/keylocker.h |  9 ++++
 arch/x86/kernel/Makefile         |  1 +
 arch/x86/kernel/cpu/common.c     |  5 +-
 arch/x86/kernel/keylocker.c      | 79 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/smpboot.c        |  2 +
 5 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/keylocker.c

Comments

Evan Green Aug. 23, 2022, 3:49 p.m. UTC | #1
On Wed, Jan 12, 2022 at 1:21 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
>
> The Internal Wrapping Key (IWKey) is an entity of Key Locker to encode a
> clear text key into a key handle. This key is a pivot in protecting user
> keys. So the value has to be randomized before being loaded in the
> software-invisible CPU state.
>
> IWKey needs to be established before the first user. Given that the only
> proposed Linux use case for Key Locker is dm-crypt, the feature could be
> lazily enabled when the first dm-crypt user arrives, but there is no
> precedent for late enabling of CPU features and it adds maintenance burden
> without demonstrative benefit outside of minimizing the visibility of
> Key Locker to userspace.
>
> The kernel generates random bytes and load them at boot time. These bytes
> are flushed out immediately.
>
> Setting the CR4.KL bit does not always enable the feature so ensure the
> dynamic CPU bit (CPUID.AESKLE) is set before loading the key.
>
> Given that the Linux Key Locker support is only intended for bare metal
> dm-crypt consumption, and that switching IWKey per VM is untenable,
> explicitly skip Key Locker setup in the X86_FEATURE_HYPERVISOR case.
>
> Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
> Reviewed-by: Dan Williams <dan.j.williams@intel.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
> Changes from RFC v2:
> * Make bare metal only.
> * Clean up the code (e.g. dynamically allocate the key cache).
>   (Dan Williams)
> * Massage the changelog.
> * Move out the LOADIWKEY wrapper and the Key Locker CPUID defines.
>
> Note, Dan wonders that given that the only proposed Linux use case for
> Key Locker is dm-crypt, the feature could be lazily enabled when the
> first dm-crypt user arrives, but as Dave notes there is no precedent
> for late enabling of CPU features and it adds maintenance burden
> without demonstrative benefit outside of minimizing the visibility of
> Key Locker to userspace.
> ---
>  arch/x86/include/asm/keylocker.h |  9 ++++
>  arch/x86/kernel/Makefile         |  1 +
>  arch/x86/kernel/cpu/common.c     |  5 +-
>  arch/x86/kernel/keylocker.c      | 79 ++++++++++++++++++++++++++++++++
>  arch/x86/kernel/smpboot.c        |  2 +
>  5 files changed, 95 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/kernel/keylocker.c
>
> diff --git a/arch/x86/include/asm/keylocker.h b/arch/x86/include/asm/keylocker.h
> index e85dfb6c1524..820ac29c06d9 100644
> --- a/arch/x86/include/asm/keylocker.h
> +++ b/arch/x86/include/asm/keylocker.h
> @@ -5,6 +5,7 @@
>
>  #ifndef __ASSEMBLY__
>
> +#include <asm/processor.h>
>  #include <linux/bits.h>
>  #include <asm/fpu/types.h>
>
> @@ -28,5 +29,13 @@ struct iwkey {
>  #define KEYLOCKER_CPUID_EBX_WIDE       BIT(2)
>  #define KEYLOCKER_CPUID_EBX_BACKUP     BIT(4)
>
> +#ifdef CONFIG_X86_KEYLOCKER
> +void setup_keylocker(struct cpuinfo_x86 *c);
> +void destroy_keylocker_data(void);
> +#else
> +#define setup_keylocker(c) do { } while (0)
> +#define destroy_keylocker_data() do { } while (0)
> +#endif
> +
>  #endif /*__ASSEMBLY__ */
>  #endif /* _ASM_KEYLOCKER_H */
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 2ff3e600f426..e15efa238497 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -144,6 +144,7 @@ obj-$(CONFIG_PERF_EVENTS)           += perf_regs.o
>  obj-$(CONFIG_TRACING)                  += tracepoint.o
>  obj-$(CONFIG_SCHED_MC_PRIO)            += itmt.o
>  obj-$(CONFIG_X86_UMIP)                 += umip.o
> +obj-$(CONFIG_X86_KEYLOCKER)            += keylocker.o
>
>  obj-$(CONFIG_UNWINDER_ORC)             += unwind_orc.o
>  obj-$(CONFIG_UNWINDER_FRAME_POINTER)   += unwind_frame.o
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index 0083464de5e3..23b4aa437c1e 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -57,6 +57,8 @@
>  #include <asm/microcode_intel.h>
>  #include <asm/intel-family.h>
>  #include <asm/cpu_device_id.h>
> +#include <asm/keylocker.h>
> +
>  #include <asm/uv/uv.h>
>  #include <asm/sigframe.h>
>
> @@ -1595,10 +1597,11 @@ static void identify_cpu(struct cpuinfo_x86 *c)
>         /* Disable the PN if appropriate */
>         squash_the_stupid_serial_number(c);
>
> -       /* Set up SMEP/SMAP/UMIP */
> +       /* Setup various Intel-specific CPU security features */
>         setup_smep(c);
>         setup_smap(c);
>         setup_umip(c);
> +       setup_keylocker(c);
>
>         /* Enable FSGSBASE instructions if available. */
>         if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
> diff --git a/arch/x86/kernel/keylocker.c b/arch/x86/kernel/keylocker.c
> new file mode 100644
> index 000000000000..87d775a65716
> --- /dev/null
> +++ b/arch/x86/kernel/keylocker.c
> @@ -0,0 +1,79 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +/*
> + * Setup Key Locker feature and support internal wrapping key
> + * management.
> + */
> +
> +#include <linux/random.h>
> +#include <linux/poison.h>
> +
> +#include <asm/fpu/api.h>
> +#include <asm/keylocker.h>
> +#include <asm/tlbflush.h>
> +
> +static __initdata struct keylocker_setup_data {
> +       struct iwkey key;
> +} kl_setup;
> +
> +static void __init generate_keylocker_data(void)
> +{
> +       get_random_bytes(&kl_setup.key.integrity_key,  sizeof(kl_setup.key.integrity_key));
> +       get_random_bytes(&kl_setup.key.encryption_key, sizeof(kl_setup.key.encryption_key));
> +}
> +
> +void __init destroy_keylocker_data(void)
> +{
> +       memset(&kl_setup.key, KEY_DESTROY, sizeof(kl_setup.key));
> +}
> +
> +static void __init load_keylocker(void)

I am late to this party by 6 months, but:
load_keylocker() cannot be __init, as it gets called during SMP core onlining.

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Chang S. Bae Aug. 24, 2022, 10:20 p.m. UTC | #2
On 8/23/2022 8:49 AM, Evan Green wrote:
> On Wed, Jan 12, 2022 at 1:21 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
>>
<snip>
>> +
>> +static void __init load_keylocker(void)
> 
> I am late to this party by 6 months, but:
> load_keylocker() cannot be __init, as it gets called during SMP core onlining.

Yeah, it looks like the case with this patch only.

But the next patch [1] limits the call during boot time only:

	if (c == &boot_cpu_data) {
		...
		load_keylocker();
		...
	} else {
		...
		if (!kl_setup.initialized) {
			load_keylocker();
		} else if (valid_kl) {
			rc = copy_keylocker();
			...
		}
	}

kl_setup.initialized is set by native_smp_cpus_done() -> 
destroy_keylocker_data() when CPUs are booted. Then load_keylocker() is 
not called because the root key (aka IWKey) is no longer available in 
memory.

Now this 'valid_kl' flag should be always on unless the root key backup 
is corrupted. Then copy_keylocker() loads the root key from the backup 
in the platform state.

So I think the onlining CPU won't call it.

Maybe this bit can be much clarified in a separate (new) patch, instead 
of being part of another like [1].

Thanks,
Chang

[1]: 
https://lore.kernel.org/lkml/20220112211258.21115-9-chang.seok.bae@intel.com/

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Evan Green Aug. 24, 2022, 10:52 p.m. UTC | #3
On Wed, Aug 24, 2022 at 3:21 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
>
> On 8/23/2022 8:49 AM, Evan Green wrote:
> > On Wed, Jan 12, 2022 at 1:21 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
> >>
> <snip>
> >> +
> >> +static void __init load_keylocker(void)
> >
> > I am late to this party by 6 months, but:
> > load_keylocker() cannot be __init, as it gets called during SMP core onlining.
>
> Yeah, it looks like the case with this patch only.
>
> But the next patch [1] limits the call during boot time only:
>
>         if (c == &boot_cpu_data) {
>                 ...
>                 load_keylocker();
>                 ...
>         } else {
>                 ...
>                 if (!kl_setup.initialized) {
>                         load_keylocker();
>                 } else if (valid_kl) {
>                         rc = copy_keylocker();
>                         ...
>                 }
>         }
>
> kl_setup.initialized is set by native_smp_cpus_done() ->
> destroy_keylocker_data() when CPUs are booted. Then load_keylocker() is
> not called because the root key (aka IWKey) is no longer available in
> memory.
>
> Now this 'valid_kl' flag should be always on unless the root key backup
> is corrupted. Then copy_keylocker() loads the root key from the backup
> in the platform state.
>
> So I think the onlining CPU won't call it.
>
> Maybe this bit can be much clarified in a separate (new) patch, instead
> of being part of another like [1].

Whatever we ended up landing in the ChromeOS tree (which I think was
v4 of this series) actively hit this bug in hibernation, which is how
I found it. I couldn't get a full backtrace because the backtracing
code tripped over itself as well for some reason. If the next patch in
this series is different from what we landed in ChromeOS, then maybe
your description is correct, but I haven't dug in to understand the
delta.

-Evan

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Chang S. Bae Aug. 25, 2022, 1:06 a.m. UTC | #4
On 8/24/2022 3:52 PM, Evan Green wrote:
> 
> Whatever we ended up landing in the ChromeOS tree (which I think was
> v4 of this series) actively hit this bug in hibernation, which is how
> I found it. I couldn't get a full backtrace because the backtracing
> code tripped over itself as well for some reason. If the next patch in
> this series is different from what we landed in ChromeOS, then maybe
> your description is correct, but I haven't dug in to understand the
> delta.

So the change from v4 is simply dropping CBC mode. Marvin who reported 
another issue told me that he pushed the fix to some Chrome repository. 
But I don't know that's the same repo that you mentioned. Are you able 
to locate that tree if possible?

Also, it would be nice to have more detail about that hibernation bug.

Thanks,
Chang

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Evan Green Aug. 25, 2022, 3:31 p.m. UTC | #5
On Wed, Aug 24, 2022 at 6:06 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
>
> On 8/24/2022 3:52 PM, Evan Green wrote:
> >
> > Whatever we ended up landing in the ChromeOS tree (which I think was
> > v4 of this series) actively hit this bug in hibernation, which is how
> > I found it. I couldn't get a full backtrace because the backtracing
> > code tripped over itself as well for some reason. If the next patch in
> > this series is different from what we landed in ChromeOS, then maybe
> > your description is correct, but I haven't dug in to understand the
> > delta.
>
> So the change from v4 is simply dropping CBC mode. Marvin who reported
> another issue told me that he pushed the fix to some Chrome repository.
> But I don't know that's the same repo that you mentioned. Are you able
> to locate that tree if possible?

I see. The only ChromeOS tree I'm aware of where keylocker has landed
is our 5.10 tree. This is the change where it landed:
https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/3373776/12

>
> Also, it would be nice to have more detail about that hibernation bug.

Here's the log I've got that pointed me down this path:
https://pastebin.com/VvR1EHvE

Relevant bit pasted below:

<6>[43486.263035] Enabling non-boot CPUs ...
<6>[43486.263081] x86: Booting SMP configuration:
<6>[43486.263082] smpboot: Booting Node 0 Processor 1 APIC 0x1
<2>[43486.264010] kernel tried to execute NX-protected page - exploit
attempt? (uid: 0)
<1>[43486.264019] BUG: unable to handle page fault for address: ffffffff94b483a6
<1>[43486.264021] #PF: supervisor instruction fetch in kernel mode
<1>[43486.264023] #PF: error_code(0x0011) - permissions violation
<6>[43486.264025] PGD 391c0e067 P4D 391c0e067 PUD 391c0f063 PMD
10006c063 PTE 8000000392148163
<4>[43486.264031] Oops: 0011 [#1] PREEMPT SMP NOPTI
<4>[43486.264035] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G U
5.10.136-19391-gadfe4d4b8c04 #1
b640352a7a0e5f1522aed724296ad63f90c007df
<4>[43486.264036] Hardware name: Google Primus/Primus, BIOS
Google_Primus.14505.145.0 06/23/2022
<4>[43486.264042] RIP: 0010:load_keylocker+0x0/0x7f
<4>[43486.264044] Code: 02 46 0a 0c 07 08 44 0b 24 00 00 00 10 26 00
00 44 d5 e9 ff dd 00 00 00 00 41 0e 10 86 02 43 0d 06 42 8d 03 49 8c
04 02 61 0a <0c> 07 08 48 0b 00 24 00 00 00 38 26 00 00 fc d5 e9 ff ba
00 00 00
<4>[43486.264046] RSP: 0000:ffffb1c7000afe50 EFLAGS: 00010046
<4>[43486.264048] RAX: ffffffff9483a898 RBX: ffff8d64ef855440 RCX:
0000000000310800
<4>[43486.264049] RDX: 0000000000310800 RSI: 0000000000000000 RDI:
00000000003f0ea0
<4>[43486.264051] RBP: ffffb1c7000afe88 R08: 0000000000000000 R09:
0000000000003000
<4>[43486.264052] R10: 0000000000000500 R11: ffffffff92c6c775 R12:
ffff8d64ef8554c0
<4>[43486.264053] R13: 0000000000000000 R14: 0000000000000082 R15:
ffff8d64ef855460
<4>[43486.264055] FS: 0000000000000000(0000) GS:ffff8d64ef840000(0000)
knlGS:0000000000000000
<4>[43486.264057] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[43486.264058] CR2: ffffffff94b483a6 CR3: 0000000391c0c001 CR4:
00000000003f0ea0
<4>[43486.264063] invalid opcode: 0000 [#2] PREEMPT SMP NOPTI
<4>[43486.264065] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G U
5.10.136-19391-gadfe4d4b8c04 #1
b640352a7a0e5f1522aed724296ad63f90c007df
<4>[43486.264066] Hardware name: Google Primus/Primus, BIOS
Google_Primus.14505.145.0 06/23/2022
<4>[43486.264069] RIP: 0010:__show_regs+0x2ed/0x338
<4>[43486.264071] Code: 81 fc 00 04 00 00 75 44 48 f7 05 ca 83 90 01
10 00 00 00 0f 84 fa fd ff ff 31 d2 48 f7 05 b7 83 90 01 10 00 00 00
74 07 31 c9 <0f> 01 ee 89 c2 48 c7 c7 90 38 29 94 4c 89 f6 48 83 c4 28
5b 41 5c
<4>[43486.264072] RSP: 0000:ffffb1c7000afc90 EFLAGS: 00010046
<4>[43486.264074] RAX: 00000000ffff0ff0 RBX: 0000000000000000 RCX:
0000000000000000
<4>[43486.264075] RDX: 0000000000000000 RSI: 0000000000000004 RDI:
ffffffff94cf27f4
<4>[43486.264076] RBP: ffffb1c7000afce0 R08: 0000000000000000 R09:
00000000ffffdfff
<4>[43486.264078] R10: ffffffff94658600 R11: 3fffffffffffffff R12:
0000000000000400
<4>[43486.264079] R13: ffff8d64ef840000 R14: ffffffff9435d0a9 R15:
00000000ffff0ff0
<4>[43486.264080] FS: 0000000000000000(0000) GS:ffff8d64ef840000(0000)
knlGS:0000000000000000
<4>[43486.264082] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
<4>[43486.264083] CR2: ffffffff94b483a6 CR3: 0000000391c0c001 CR4:
00000000003f0ea0
<4>[43486.264085] invalid opcode: 0000 [#3] PREEMPT SMP NOPTI
<4>[43486.264086] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G U
5.10.136-19391-gadfe4d4b8c04 #1
b640352a7a0e5f1522aed724296ad63f90c007df
<4>[43486.264088] Hardware name: Google Primus/Primus, BIOS
Google_Primus.14505.145.0 06/23/2022
<4>[43486.264089] RIP: 0010:__show_regs+0x2ed/0x338

I landed this change, though I'm still working on verifying the issue
goes away with this fix:
https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/3851401

I don't have direct access to this machine, but I wonder if a simple
cpu hotplug might also exercise this path.
-Evan

>
> Thanks,
> Chang
>

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Chang S. Bae Aug. 31, 2022, 11:08 p.m. UTC | #6
On 8/25/2022 8:31 AM, Evan Green wrote:
> 
> Here's the log I've got that pointed me down this path:
> https://pastebin.com/VvR1EHvE

     <3>[43486.261583] x86/keylocker: The key backup access failed with 
read error.
     <3>[43486.261584] x86/keylocker: Failed to restore internal 
wrapping key.

Looks like the IWKey backup was corrupted on that system.

> Relevant bit pasted below:
> 
> <6>[43486.263035] Enabling non-boot CPUs ...
> <6>[43486.263081] x86: Booting SMP configuration:
> <6>[43486.263082] smpboot: Booting Node 0 Processor 1 APIC 0x1
> <2>[43486.264010] kernel tried to execute NX-protected page - exploit
> attempt? (uid: 0)
> <1>[43486.264019] BUG: unable to handle page fault for address: ffffffff94b483a6
> <1>[43486.264021] #PF: supervisor instruction fetch in kernel mode
> <1>[43486.264023] #PF: error_code(0x0011) - permissions violation
> <6>[43486.264025] PGD 391c0e067 P4D 391c0e067 PUD 391c0f063 PMD
> 10006c063 PTE 8000000392148163
> <4>[43486.264031] Oops: 0011 [#1] PREEMPT SMP NOPTI
> <4>[43486.264035] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G U
> 5.10.136-19391-gadfe4d4b8c04 #1
> b640352a7a0e5f1522aed724296ad63f90c007df
> <4>[43486.264036] Hardware name: Google Primus/Primus, BIOS
> Google_Primus.14505.145.0 06/23/2022
> <4>[43486.264042] RIP: 0010:load_keylocker+0x0/0x7f

But, I don't get the reason why it hit this. On the wake-up path, 
copy_keylocker() is supposed to be called.

I added some printout in there, and it looks to be fine with me:

     [  218.488711] Enabling non-boot CPUs ...
     [  218.488794] x86: Booting SMP configuration:
     [  218.488795] smpboot: Booting Node 0 Processor 1 APIC 0x1
     [  218.490634] x86/keylocker: restore processor (id=1)
     [  218.491186] CPU1 is up
     ...

Thanks,
Chang

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Evan Green Sept. 6, 2022, 4:22 p.m. UTC | #7
On Wed, Aug 31, 2022 at 4:16 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
>
> On 8/25/2022 8:31 AM, Evan Green wrote:
> >
> > Here's the log I've got that pointed me down this path:
> > https://pastebin.com/VvR1EHvE
>
>      <3>[43486.261583] x86/keylocker: The key backup access failed with
> read error.
>      <3>[43486.261584] x86/keylocker: Failed to restore internal
> wrapping key.
>
> Looks like the IWKey backup was corrupted on that system.
>
> > Relevant bit pasted below:
> >
> > <6>[43486.263035] Enabling non-boot CPUs ...
> > <6>[43486.263081] x86: Booting SMP configuration:
> > <6>[43486.263082] smpboot: Booting Node 0 Processor 1 APIC 0x1
> > <2>[43486.264010] kernel tried to execute NX-protected page - exploit
> > attempt? (uid: 0)
> > <1>[43486.264019] BUG: unable to handle page fault for address: ffffffff94b483a6
> > <1>[43486.264021] #PF: supervisor instruction fetch in kernel mode
> > <1>[43486.264023] #PF: error_code(0x0011) - permissions violation
> > <6>[43486.264025] PGD 391c0e067 P4D 391c0e067 PUD 391c0f063 PMD
> > 10006c063 PTE 8000000392148163
> > <4>[43486.264031] Oops: 0011 [#1] PREEMPT SMP NOPTI
> > <4>[43486.264035] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G U
> > 5.10.136-19391-gadfe4d4b8c04 #1
> > b640352a7a0e5f1522aed724296ad63f90c007df
> > <4>[43486.264036] Hardware name: Google Primus/Primus, BIOS
> > Google_Primus.14505.145.0 06/23/2022
> > <4>[43486.264042] RIP: 0010:load_keylocker+0x0/0x7f
>
> But, I don't get the reason why it hit this. On the wake-up path,
> copy_keylocker() is supposed to be called.

Interesting, that's helpful. I thought I had a lead based on this,
which was that in this case we were doing a hibernate to shutdown,
rather than hibernate to S4. The IWKey backup is only valid down to
S4, so a read error on resume from this type of hibernate might make
sense. I know keylocker won't successfully maintain handles across a
hibernate to shutdown and subsequent resume, but it shouldn't crash
either.

But this still doesn't explain this crash, since in this case we're
still on our way down and haven't even done the shutdown yet. We can
see the "PM: hibernation: Image created (1536412 pages copied)" log
line just before the keylocker read failure. So then it seems
something's not working with the pre-hibernate CPU hotplug path?


>
> I added some printout in there, and it looks to be fine with me:
>
>      [  218.488711] Enabling non-boot CPUs ...
>      [  218.488794] x86: Booting SMP configuration:
>      [  218.488795] smpboot: Booting Node 0 Processor 1 APIC 0x1
>      [  218.490634] x86/keylocker: restore processor (id=1)
>      [  218.491186] CPU1 is up
>      ...

How were you exercising the CPU onlining in this case? Boot, cpu
hotplug, or hibernate?
-Evan

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
Chang S. Bae Sept. 6, 2022, 4:46 p.m. UTC | #8
On 9/6/2022 9:22 AM, Evan Green wrote:
> On Wed, Aug 31, 2022 at 4:16 PM Chang S. Bae <chang.seok.bae@intel.com> wrote:
> 
> But this still doesn't explain this crash, since in this case we're
> still on our way down and haven't even done the shutdown yet. We can
> see the "PM: hibernation: Image created (1536412 pages copied)" log
> line just before the keylocker read failure. So then it seems
> something's not working with the pre-hibernate CPU hotplug path?

No, that backup crash message came from the boot-CPU's wakeup path:
__restore_processor_state()->restore_keylocker().

If the issue is repeatable, then I suspect that's something to do with 
the specific platform implementation. I don't have any detail yet on the 
customized systems. Let me check with some Chrome HW folks.

> How were you exercising the CPU onlining in this case? Boot, cpu
> hotplug, or hibernate?

Hibernate.

Thanks,
Chang

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel
diff mbox series

Patch

diff --git a/arch/x86/include/asm/keylocker.h b/arch/x86/include/asm/keylocker.h
index e85dfb6c1524..820ac29c06d9 100644
--- a/arch/x86/include/asm/keylocker.h
+++ b/arch/x86/include/asm/keylocker.h
@@ -5,6 +5,7 @@ 
 
 #ifndef __ASSEMBLY__
 
+#include <asm/processor.h>
 #include <linux/bits.h>
 #include <asm/fpu/types.h>
 
@@ -28,5 +29,13 @@  struct iwkey {
 #define KEYLOCKER_CPUID_EBX_WIDE	BIT(2)
 #define KEYLOCKER_CPUID_EBX_BACKUP	BIT(4)
 
+#ifdef CONFIG_X86_KEYLOCKER
+void setup_keylocker(struct cpuinfo_x86 *c);
+void destroy_keylocker_data(void);
+#else
+#define setup_keylocker(c) do { } while (0)
+#define destroy_keylocker_data() do { } while (0)
+#endif
+
 #endif /*__ASSEMBLY__ */
 #endif /* _ASM_KEYLOCKER_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2ff3e600f426..e15efa238497 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -144,6 +144,7 @@  obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
 obj-$(CONFIG_SCHED_MC_PRIO)		+= itmt.o
 obj-$(CONFIG_X86_UMIP)			+= umip.o
+obj-$(CONFIG_X86_KEYLOCKER)		+= keylocker.o
 
 obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
 obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0083464de5e3..23b4aa437c1e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -57,6 +57,8 @@ 
 #include <asm/microcode_intel.h>
 #include <asm/intel-family.h>
 #include <asm/cpu_device_id.h>
+#include <asm/keylocker.h>
+
 #include <asm/uv/uv.h>
 #include <asm/sigframe.h>
 
@@ -1595,10 +1597,11 @@  static void identify_cpu(struct cpuinfo_x86 *c)
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
-	/* Set up SMEP/SMAP/UMIP */
+	/* Setup various Intel-specific CPU security features */
 	setup_smep(c);
 	setup_smap(c);
 	setup_umip(c);
+	setup_keylocker(c);
 
 	/* Enable FSGSBASE instructions if available. */
 	if (cpu_has(c, X86_FEATURE_FSGSBASE)) {
diff --git a/arch/x86/kernel/keylocker.c b/arch/x86/kernel/keylocker.c
new file mode 100644
index 000000000000..87d775a65716
--- /dev/null
+++ b/arch/x86/kernel/keylocker.c
@@ -0,0 +1,79 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Setup Key Locker feature and support internal wrapping key
+ * management.
+ */
+
+#include <linux/random.h>
+#include <linux/poison.h>
+
+#include <asm/fpu/api.h>
+#include <asm/keylocker.h>
+#include <asm/tlbflush.h>
+
+static __initdata struct keylocker_setup_data {
+	struct iwkey key;
+} kl_setup;
+
+static void __init generate_keylocker_data(void)
+{
+	get_random_bytes(&kl_setup.key.integrity_key,  sizeof(kl_setup.key.integrity_key));
+	get_random_bytes(&kl_setup.key.encryption_key, sizeof(kl_setup.key.encryption_key));
+}
+
+void __init destroy_keylocker_data(void)
+{
+	memset(&kl_setup.key, KEY_DESTROY, sizeof(kl_setup.key));
+}
+
+static void __init load_keylocker(void)
+{
+	kernel_fpu_begin();
+	load_xmm_iwkey(&kl_setup.key);
+	kernel_fpu_end();
+}
+
+/**
+ * setup_keylocker - Enable the feature.
+ * @c:		A pointer to struct cpuinfo_x86
+ */
+void __ref setup_keylocker(struct cpuinfo_x86 *c)
+{
+	/* This feature is not compatible with a hypervisor. */
+	if (!cpu_feature_enabled(X86_FEATURE_KEYLOCKER) ||
+	    cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
+		goto out;
+
+	cr4_set_bits(X86_CR4_KEYLOCKER);
+
+	if (c == &boot_cpu_data) {
+		u32 eax, ebx, ecx, edx;
+
+		cpuid_count(KEYLOCKER_CPUID, 0, &eax, &ebx, &ecx, &edx);
+		/*
+		 * Check the feature readiness via CPUID. Note that the
+		 * CPUID AESKLE bit is conditionally set only when CR4.KL
+		 * is set.
+		 */
+		if (!(ebx & KEYLOCKER_CPUID_EBX_AESKLE) ||
+		    !(eax & KEYLOCKER_CPUID_EAX_SUPERVISOR)) {
+			pr_debug("x86/keylocker: Not fully supported.\n");
+			goto disable;
+		}
+
+		generate_keylocker_data();
+	}
+
+	load_keylocker();
+
+	pr_info_once("x86/keylocker: Enabled.\n");
+	return;
+
+disable:
+	setup_clear_cpu_cap(X86_FEATURE_KEYLOCKER);
+	pr_info_once("x86/keylocker: Disabled.\n");
+out:
+	/* Make sure the feature disabled for kexec-reboot. */
+	cr4_clear_bits(X86_CR4_KEYLOCKER);
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 617012f4619f..00cfa64948f5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -82,6 +82,7 @@ 
 #include <asm/spec-ctrl.h>
 #include <asm/hw_irq.h>
 #include <asm/stackprotector.h>
+#include <asm/keylocker.h>
 
 #ifdef CONFIG_ACPI_CPPC_LIB
 #include <acpi/cppc_acpi.h>
@@ -1489,6 +1490,7 @@  void __init native_smp_cpus_done(unsigned int max_cpus)
 	nmi_selftest();
 	impress_friends();
 	mtrr_aps_init();
+	destroy_keylocker_data();
 }
 
 static int __initdata setup_possible_cpus = -1;