diff mbox series

[v3,06/21] x86/fpu/xstate: Calculate and remember dynamic xstate buffer sizes

Message ID 20201223155717.19556-7-chang.seok.bae@intel.com (mailing list archive)
State New, archived
Headers show
Series [v3,01/21] x86/fpu/xstate: Modify initialization helper to handle both static and dynamic buffers | expand

Commit Message

Chang S. Bae Dec. 23, 2020, 3:57 p.m. UTC
The xstate buffer is currently in-line with static size. To accommodate
dynamic user xstates, introduce variables to represent the maximum and
minimum buffer sizes.

do_extra_xstate_size_checks() calculates the maximum xstate size and sanity
checks it with CPUID. It calculates the static in-line buffer size by
excluding the dynamic user states from the maximum xstate size.

No functional change, until the kernel enables dynamic buffer support.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Reviewed-by: Len Brown <len.brown@intel.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
---
Changes from v2:
* Updated the changelog with task->fpu removed. (Boris Petkov)
* Renamed the in-line size variable.
* Updated some code comments.
---
 arch/x86/include/asm/processor.h | 10 +++----
 arch/x86/kernel/fpu/core.c       |  6 ++---
 arch/x86/kernel/fpu/init.c       | 36 ++++++++++++++++---------
 arch/x86/kernel/fpu/signal.c     |  2 +-
 arch/x86/kernel/fpu/xstate.c     | 46 +++++++++++++++++++++-----------
 arch/x86/kernel/process.c        |  6 +++++
 arch/x86/kvm/x86.c               |  2 +-
 7 files changed, 67 insertions(+), 41 deletions(-)

Comments

Borislav Petkov Jan. 22, 2021, 11:44 a.m. UTC | #1
On Wed, Dec 23, 2020 at 07:57:02AM -0800, Chang S. Bae wrote:
> The xstate buffer is currently in-line with static size. To accommodatea

"in-line" doesn't fit in this context, especially since "inline"
is a keyword with another meaning. Please replace it with a better
formulation in this patch.

> dynamic user xstates, introduce variables to represent the maximum and
> minimum buffer sizes.
> 
> do_extra_xstate_size_checks() calculates the maximum xstate size and sanity
> checks it with CPUID. It calculates the static in-line buffer size by
> excluding the dynamic user states from the maximum xstate size.
> 
> No functional change, until the kernel enables dynamic buffer support.
> 
> Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
> Reviewed-by: Len Brown <len.brown@intel.com>
> Cc: x86@kernel.org
> Cc: linux-kernel@vger.kernel.org
> Cc: kvm@vger.kernel.org
> ---
> Changes from v2:
> * Updated the changelog with task->fpu removed. (Boris Petkov)
> * Renamed the in-line size variable.
> * Updated some code comments.
> ---
>  arch/x86/include/asm/processor.h | 10 +++----
>  arch/x86/kernel/fpu/core.c       |  6 ++---
>  arch/x86/kernel/fpu/init.c       | 36 ++++++++++++++++---------
>  arch/x86/kernel/fpu/signal.c     |  2 +-
>  arch/x86/kernel/fpu/xstate.c     | 46 +++++++++++++++++++++-----------
>  arch/x86/kernel/process.c        |  6 +++++
>  arch/x86/kvm/x86.c               |  2 +-
>  7 files changed, 67 insertions(+), 41 deletions(-)
> 
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 82a08b585818..c9c608f8af91 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -477,7 +477,8 @@ DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
>  DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
>  #endif	/* X86_64 */
>  
> -extern unsigned int fpu_kernel_xstate_size;
> +extern unsigned int fpu_kernel_xstate_min_size;
> +extern unsigned int fpu_kernel_xstate_max_size;

Is it time to group this into a struct so that all those settings go
together instead in single variables?

struct fpu_xstate {
	unsigned int min_size, max_size;
	unsigned int user_size;
	...
};

etc.

>  extern unsigned int fpu_user_xstate_size;
>  
>  struct perf_event;
> @@ -545,12 +546,7 @@ struct thread_struct {
>  };
>  
>  /* Whitelist the FPU state from the task_struct for hardened usercopy. */
> -static inline void arch_thread_struct_whitelist(unsigned long *offset,
> -						unsigned long *size)
> -{
> -	*offset = offsetof(struct thread_struct, fpu.state);
> -	*size = fpu_kernel_xstate_size;
> -}
> +extern void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size);

What's that move for?

> diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
> index 74e03e3bc20f..5dac97158030 100644
> --- a/arch/x86/kernel/fpu/init.c
> +++ b/arch/x86/kernel/fpu/init.c
> @@ -130,13 +130,20 @@ static void __init fpu__init_system_generic(void)
>  }
>  
>  /*
> - * Size of the FPU context state. All tasks in the system use the
> - * same context size, regardless of what portion they use.
> - * This is inherent to the XSAVE architecture which puts all state
> - * components into a single, continuous memory block:
> + * Size of the minimally allocated FPU context state. All threads have this amount
> + * of xstate buffer at minimum.
> + *
> + * This buffer is inherent to the XSAVE architecture which puts all state components
> + * into a single, continuous memory block:
> + */
> +unsigned int fpu_kernel_xstate_min_size;
> +EXPORT_SYMBOL_GPL(fpu_kernel_xstate_min_size);
> +
> +/*
> + * Size of the maximum FPU context state. When using the compacted format, the buffer
> + * can be dynamically expanded to include some states up to this size.
>   */
> -unsigned int fpu_kernel_xstate_size;
> -EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
> +unsigned int fpu_kernel_xstate_max_size;

And since we're probably going to start querying different aspects about
the buffer, instead of exporting all kinds of variables in the future,
maybe this should be a single exported function called

get_xstate_buffer_attr(typedef buffer_attr)

which gives you what you wanna know about it... For example:

get_xstate_buffer_attr(MIN_SIZE);
get_xstate_buffer_attr(MAX_SIZE);
...

> diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
> index 414a13427934..b6d2706b6886 100644
> --- a/arch/x86/kernel/fpu/signal.c
> +++ b/arch/x86/kernel/fpu/signal.c
> @@ -289,8 +289,8 @@ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
>  
>  static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
>  {
> +	int state_size = fpu_kernel_xstate_min_size;
>  	struct user_i387_ia32_struct *envp = NULL;
> -	int state_size = fpu_kernel_xstate_size;
>  	int ia32_fxstate = (buf != buf_fx);
>  	struct task_struct *tsk = current;
>  	struct fpu *fpu = &tsk->thread.fpu;
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 6620d0a3caff..2012b17b1793 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
>   */

<-- There's a comment over this function that might need adjustment.

>  static void do_extra_xstate_size_checks(void)
>  {
> -	int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
> +	int paranoid_min_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
> +	int paranoid_max_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
>  	int i;

...

> @@ -744,27 +758,27 @@ static bool is_supported_xstate_size(unsigned int test_xstate_size)
>  static int __init init_xstate_size(void)
>  {
>  	/* Recompute the context size for enabled features: */
> -	unsigned int possible_xstate_size;
> +	unsigned int possible_max_xstate_size;
>  	unsigned int xsave_size;
>  
>  	xsave_size = get_xsave_size();
>  
>  	if (boot_cpu_has(X86_FEATURE_XSAVES))

using_compacted_format()

FPU code needs to agree on one helper and not use both. :-\

> -		possible_xstate_size = get_xsaves_size_no_dynamic();
> +		possible_max_xstate_size = get_xsaves_size_no_dynamic();
>  	else
> -		possible_xstate_size = xsave_size;
> -
> -	/* Ensure we have the space to store all enabled: */
> -	if (!is_supported_xstate_size(possible_xstate_size))
> -		return -EINVAL;
> +		possible_max_xstate_size = xsave_size;
>  
>  	/*
>  	 * The size is OK, we are definitely going to use xsave,
>  	 * make it known to the world that we need more space.
>  	 */
> -	fpu_kernel_xstate_size = possible_xstate_size;
> +	fpu_kernel_xstate_max_size = possible_max_xstate_size;
>  	do_extra_xstate_size_checks();
>  
> +	/* Ensure we have the supported in-line space: */

Who's "we"?

> +	if (!is_supported_xstate_size(fpu_kernel_xstate_min_size))
> +		return -EINVAL;
> +
>  	/*
>  	 * User space is always in standard format.
>  	 */
Chang S. Bae Jan. 27, 2021, 1:23 a.m. UTC | #2
On Jan 22, 2021, at 03:44, Borislav Petkov <bp@suse.de> wrote:
> On Wed, Dec 23, 2020 at 07:57:02AM -0800, Chang S. Bae wrote:
>> The xstate buffer is currently in-line with static size. To accommodatea
> 
> "in-line" doesn't fit in this context, especially since "inline"
> is a keyword with another meaning. Please replace it with a better
> formulation in this patch.

How about ‘embedded’?,
    “The xstate buffer is currently embedded into struct fpu with static size."

>> -extern unsigned int fpu_kernel_xstate_size;
>> +extern unsigned int fpu_kernel_xstate_min_size;
>> +extern unsigned int fpu_kernel_xstate_max_size;
> 
> Is it time to group this into a struct so that all those settings go
> together instead in single variables?
> 
> struct fpu_xstate {
> 	unsigned int min_size, max_size;
> 	unsigned int user_size;
> 	...
> };
> 
> etc.

<snip>

> And since we're probably going to start querying different aspects about
> the buffer, instead of exporting all kinds of variables in the future,
> maybe this should be a single exported function called
> 
> get_xstate_buffer_attr(typedef buffer_attr)
> 
> which gives you what you wanna know about it... For example:
> 
> get_xstate_buffer_attr(MIN_SIZE);
> get_xstate_buffer_attr(MAX_SIZE);
> ...

Okay. I will prepare a separate cleanup patch that can be applied at the end
of the series. Will post the change in this thread at first.

>> /* Whitelist the FPU state from the task_struct for hardened usercopy. */
>> -static inline void arch_thread_struct_whitelist(unsigned long *offset,
>> -						unsigned long *size)
>> -{
>> -	*offset = offsetof(struct thread_struct, fpu.state);
>> -	*size = fpu_kernel_xstate_size;
>> -}
>> +extern void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
> 
> What's that move for?

One of my drafts had some internal helper to be called in there. No reason
prior to applying the get_xstate_buffer_attr() helper. But with it, better to
move this out of this header file I think.

>> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
>>  */
> 
> <-- There's a comment over this function that might need adjustment.

Do you mean an empty line? (Just want to clarify.)

>> static void do_extra_xstate_size_checks(void)
>> {

<snip>

>> 	if (boot_cpu_has(X86_FEATURE_XSAVES))
> 
> using_compacted_format()
> 
> FPU code needs to agree on one helper and not use both. :-\

Agreed. I will prepare a patch. At least will post the diff here.

<snip>

>> +	/* Ensure we have the supported in-line space: */
> 
> Who's "we"?

How about:
    “Ensure the size fits in the statically-allocated buffer:"

>> +	if (!is_supported_xstate_size(fpu_kernel_xstate_min_size))
>> +		return -EINVAL;

No excuse, just pointing out the upstream code has “we” there [1].

Thanks,
Chang

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kernel/fpu/xstate.c#n752
Borislav Petkov Jan. 27, 2021, 9:38 a.m. UTC | #3
On Wed, Jan 27, 2021 at 01:23:35AM +0000, Bae, Chang Seok wrote:
> How about ‘embedded’?,
>     “The xstate buffer is currently embedded into struct fpu with static size."

Better.

> Okay. I will prepare a separate cleanup patch that can be applied at the end
> of the series. Will post the change in this thread at first.

No, this is not how this works. Imagine you pile up a patch at the end
for each review feedback you've gotten. No, this will be an insane churn
and an unreviewable mess.

What you do is you rework your patches like everyone else.

Also, thinking about this more, I'm wondering if all those
xstate-related attributes shouldn't be part of struct fpu instead of
being scattered around like that.

That thing - struct fpu * - gets passed in everywhere anyway so all that
min_size, max_size, ->xstate_ptr and whatever, looks like it wants to be
part of struct fpu. Then maybe you won't need the accessors...

> One of my drafts had some internal helper to be called in there. No reason
> prior to applying the get_xstate_buffer_attr() helper. But with it, better to
> move this out of this header file I think.

See above.

> 
> >> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
> >>  */
> > 
> > <-- There's a comment over this function that might need adjustment.
> 
> Do you mean an empty line? (Just want to clarify.)

No, I mean this comment:

 * Dynamic XSAVE features allocate their own buffers and are not
 * covered by these checks. Only the size of the buffer for task->fpu
 * is checked here.

That probably needs adjusting as you do set min and max size here now
for the dynamic buffer.

> Agreed. I will prepare a patch. At least will post the diff here.

You can send it separately from this patchset, ontop of current
tip/master, so that I can take it now.

> How about:
>     “Ensure the size fits in the statically-allocated buffer:"

Yep.

> No excuse, just pointing out the upstream code has “we” there [1].

Yeah, I know. :-\

But considering how many parties develop the kernel now, "we" becomes
really ambiguous.

Thx.
Chang S. Bae Feb. 3, 2021, 2:54 a.m. UTC | #4
On Jan 27, 2021, at 01:38, Borislav Petkov <bp@suse.de> wrote:
> On Wed, Jan 27, 2021 at 01:23:35AM +0000, Bae, Chang Seok wrote:
>> Okay. I will prepare a separate cleanup patch that can be applied at the end
>> of the series. Will post the change in this thread at first.
> 
> No, this is not how this works. Imagine you pile up a patch at the end
> for each review feedback you've gotten. No, this will be an insane churn
> and an unreviewable mess.
> 
> What you do is you rework your patches like everyone else.

Yeah, it makes sense. I will post v4.

> Also, thinking about this more, I'm wondering if all those
> xstate-related attributes shouldn't be part of struct fpu instead of
> being scattered around like that.
> 
> That thing - struct fpu * - gets passed in everywhere anyway so all that
> min_size, max_size, ->xstate_ptr and whatever, looks like it wants to be
> part of struct fpu. Then maybe you won't need the accessors...

Well, min_size and max_size are not task-specific. So, it will be wasteful to
include in struct fpu.

I will follow your suggestion to add new helpers to access the size values,
instead of exporting them.

>>>> @@ -627,13 +627,18 @@ static void check_xstate_against_struct(int nr)
>>>> */
>>> 
>>> <-- There's a comment over this function that might need adjustment.
>> 
>> Do you mean an empty line? (Just want to clarify.)
> 
> No, I mean this comment:
> 
> * Dynamic XSAVE features allocate their own buffers and are not
> * covered by these checks. Only the size of the buffer for task->fpu
> * is checked here.
> 
> That probably needs adjusting as you do set min and max size here now
> for the dynamic buffer.

Oh, I see. Thank you.

>> Agreed. I will prepare a patch. At least will post the diff here.
> 
> You can send it separately from this patchset, ontop of current
> tip/master, so that I can take it now.

Posted, [1]. After all, the proposal is to remove the helper.

Thanks,
Chang

[1] https://lore.kernel.org/lkml/20210203024052.15789-1-chang.seok.bae@intel.com/
diff mbox series

Patch

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 82a08b585818..c9c608f8af91 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -477,7 +477,8 @@  DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 #endif	/* X86_64 */
 
-extern unsigned int fpu_kernel_xstate_size;
+extern unsigned int fpu_kernel_xstate_min_size;
+extern unsigned int fpu_kernel_xstate_max_size;
 extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
@@ -545,12 +546,7 @@  struct thread_struct {
 };
 
 /* Whitelist the FPU state from the task_struct for hardened usercopy. */
-static inline void arch_thread_struct_whitelist(unsigned long *offset,
-						unsigned long *size)
-{
-	*offset = offsetof(struct thread_struct, fpu.state);
-	*size = fpu_kernel_xstate_size;
-}
+extern void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size);
 
 /*
  * Thread-synchronous status.
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 20925cae2a84..1a428803e6b2 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -206,7 +206,7 @@  void fpstate_init(struct fpu *fpu)
 		return;
 	}
 
-	memset(state, 0, fpu_kernel_xstate_size);
+	memset(state, 0, fpu_kernel_xstate_min_size);
 
 	if (static_cpu_has(X86_FEATURE_XSAVES))
 		fpstate_init_xstate(&state->xsave, xfeatures_mask_all);
@@ -233,7 +233,7 @@  int fpu__copy(struct task_struct *dst, struct task_struct *src)
 	 * Don't let 'init optimized' areas of the XSAVE area
 	 * leak into the child task:
 	 */
-	memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+	memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_min_size);
 
 	/*
 	 * If the FPU registers are not current just memcpy() the state.
@@ -245,7 +245,7 @@  int fpu__copy(struct task_struct *dst, struct task_struct *src)
 	 */
 	fpregs_lock();
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
-		memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
+		memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_min_size);
 
 	else if (!copy_fpregs_to_fpstate(dst_fpu))
 		copy_kernel_to_fpregs(dst_fpu);
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 74e03e3bc20f..5dac97158030 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -130,13 +130,20 @@  static void __init fpu__init_system_generic(void)
 }
 
 /*
- * Size of the FPU context state. All tasks in the system use the
- * same context size, regardless of what portion they use.
- * This is inherent to the XSAVE architecture which puts all state
- * components into a single, continuous memory block:
+ * Size of the minimally allocated FPU context state. All threads have this amount
+ * of xstate buffer at minimum.
+ *
+ * This buffer is inherent to the XSAVE architecture which puts all state components
+ * into a single, continuous memory block:
+ */
+unsigned int fpu_kernel_xstate_min_size;
+EXPORT_SYMBOL_GPL(fpu_kernel_xstate_min_size);
+
+/*
+ * Size of the maximum FPU context state. When using the compacted format, the buffer
+ * can be dynamically expanded to include some states up to this size.
  */
-unsigned int fpu_kernel_xstate_size;
-EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
+unsigned int fpu_kernel_xstate_max_size;
 
 /* Get alignment of the TYPE. */
 #define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
@@ -167,8 +174,10 @@  static void __init fpu__init_task_struct_size(void)
 	/*
 	 * Add back the dynamically-calculated register state
 	 * size.
+	 *
+	 * Use the minimum size as in-lined to the task_struct.
 	 */
-	task_size += fpu_kernel_xstate_size;
+	task_size += fpu_kernel_xstate_min_size;
 
 	/*
 	 * We dynamically size 'struct fpu', so we require that
@@ -193,6 +202,7 @@  static void __init fpu__init_task_struct_size(void)
 static void __init fpu__init_system_xstate_size_legacy(void)
 {
 	static int on_boot_cpu __initdata = 1;
+	unsigned int size;
 
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
@@ -203,17 +213,17 @@  static void __init fpu__init_system_xstate_size_legacy(void)
 	 */
 
 	if (!boot_cpu_has(X86_FEATURE_FPU)) {
-		fpu_kernel_xstate_size = sizeof(struct swregs_state);
+		size = sizeof(struct swregs_state);
 	} else {
 		if (boot_cpu_has(X86_FEATURE_FXSR))
-			fpu_kernel_xstate_size =
-				sizeof(struct fxregs_state);
+			size = sizeof(struct fxregs_state);
 		else
-			fpu_kernel_xstate_size =
-				sizeof(struct fregs_state);
+			size = sizeof(struct fregs_state);
 	}
 
-	fpu_user_xstate_size = fpu_kernel_xstate_size;
+	fpu_kernel_xstate_min_size = size;
+	fpu_kernel_xstate_max_size = size;
+	fpu_user_xstate_size = size;
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 414a13427934..b6d2706b6886 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -289,8 +289,8 @@  static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
 
 static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 {
+	int state_size = fpu_kernel_xstate_min_size;
 	struct user_i387_ia32_struct *envp = NULL;
-	int state_size = fpu_kernel_xstate_size;
 	int ia32_fxstate = (buf != buf_fx);
 	struct task_struct *tsk = current;
 	struct fpu *fpu = &tsk->thread.fpu;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6620d0a3caff..2012b17b1793 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -627,13 +627,18 @@  static void check_xstate_against_struct(int nr)
  */
 static void do_extra_xstate_size_checks(void)
 {
-	int paranoid_xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+	int paranoid_min_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
+	int paranoid_max_size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
 	int i;
 
 	for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
+		bool dynamic;
+
 		if (!xfeature_enabled(i))
 			continue;
 
+		dynamic = (xfeatures_mask_user_dynamic & BIT_ULL(i)) ? true : false;
+
 		check_xstate_against_struct(i);
 		/*
 		 * Supervisor state components can be managed only by
@@ -643,23 +648,32 @@  static void do_extra_xstate_size_checks(void)
 			XSTATE_WARN_ON(xfeature_is_supervisor(i));
 
 		/* Align from the end of the previous feature */
-		if (xfeature_is_aligned(i))
-			paranoid_xstate_size = ALIGN(paranoid_xstate_size, 64);
+		if (xfeature_is_aligned(i)) {
+			paranoid_max_size = ALIGN(paranoid_max_size, 64);
+			if (!dynamic)
+				paranoid_min_size = ALIGN(paranoid_min_size, 64);
+		}
 		/*
 		 * The offset of a given state in the non-compacted
 		 * format is given to us in a CPUID leaf.  We check
 		 * them for being ordered (increasing offsets) in
 		 * setup_xstate_features().
 		 */
-		if (!using_compacted_format())
-			paranoid_xstate_size = xfeature_uncompacted_offset(i);
+		if (!using_compacted_format()) {
+			paranoid_max_size = xfeature_uncompacted_offset(i);
+			if (!dynamic)
+				paranoid_min_size = xfeature_uncompacted_offset(i);
+		}
 		/*
 		 * The compacted-format offset always depends on where
 		 * the previous state ended.
 		 */
-		paranoid_xstate_size += xfeature_size(i);
+		paranoid_max_size += xfeature_size(i);
+		if (!dynamic)
+			paranoid_min_size += xfeature_size(i);
 	}
-	XSTATE_WARN_ON(paranoid_xstate_size != fpu_kernel_xstate_size);
+	XSTATE_WARN_ON(paranoid_max_size != fpu_kernel_xstate_max_size);
+	fpu_kernel_xstate_min_size = paranoid_min_size;
 }
 
 
@@ -744,27 +758,27 @@  static bool is_supported_xstate_size(unsigned int test_xstate_size)
 static int __init init_xstate_size(void)
 {
 	/* Recompute the context size for enabled features: */
-	unsigned int possible_xstate_size;
+	unsigned int possible_max_xstate_size;
 	unsigned int xsave_size;
 
 	xsave_size = get_xsave_size();
 
 	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		possible_xstate_size = get_xsaves_size_no_dynamic();
+		possible_max_xstate_size = get_xsaves_size_no_dynamic();
 	else
-		possible_xstate_size = xsave_size;
-
-	/* Ensure we have the space to store all enabled: */
-	if (!is_supported_xstate_size(possible_xstate_size))
-		return -EINVAL;
+		possible_max_xstate_size = xsave_size;
 
 	/*
 	 * The size is OK, we are definitely going to use xsave,
 	 * make it known to the world that we need more space.
 	 */
-	fpu_kernel_xstate_size = possible_xstate_size;
+	fpu_kernel_xstate_max_size = possible_max_xstate_size;
 	do_extra_xstate_size_checks();
 
+	/* Ensure we have the supported in-line space: */
+	if (!is_supported_xstate_size(fpu_kernel_xstate_min_size))
+		return -EINVAL;
+
 	/*
 	 * User space is always in standard format.
 	 */
@@ -869,7 +883,7 @@  void __init fpu__init_system_xstate(void)
 
 	pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
 		xfeatures_mask_all,
-		fpu_kernel_xstate_size,
+		fpu_kernel_xstate_max_size,
 		boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
 	return;
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 145a7ac0c19a..326b16aefb06 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -96,6 +96,12 @@  int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return fpu__copy(dst, src);
 }
 
+void arch_thread_struct_whitelist(unsigned long *offset, unsigned long *size)
+{
+	*offset = offsetof(struct thread_struct, fpu.state);
+	*size = fpu_kernel_xstate_min_size;
+}
+
 /*
  * Free thread data structures etc..
  */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a087bbf252b6..4aecfba04bd3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9220,7 +9220,7 @@  static void kvm_save_current_fpu(struct fpu *fpu)
 	 */
 	if (test_thread_flag(TIF_NEED_FPU_LOAD))
 		memcpy(&fpu->state, &current->thread.fpu.state,
-		       fpu_kernel_xstate_size);
+		       fpu_kernel_xstate_min_size);
 	else
 		copy_fpregs_to_fpstate(fpu);
 }