diff mbox series

[11/22] x86/fpu: Make get_xsave_field_ptr() and get_xsave_addr() use feature number instead of mask

Message ID 20190109114744.10936-12-bigeasy@linutronix.de (mailing list archive)
State New, archived
Headers show
Series [v6] x86: load FPU registers on return to userland | expand

Commit Message

Sebastian Andrzej Siewior Jan. 9, 2019, 11:47 a.m. UTC
After changing the argument of __raw_xsave_addr() from a mask to number
Dave suggested to check if it makes sense to do the same for
get_xsave_addr(). As it turns out it does. Only get_xsave_addr() needs
the mask to check if the requested feature is part of what is
support/saved and then uses the number again. The shift operation is
cheaper compared to "find last bit set". Also, the feature number uses
less opcode space compared to the mask :)

Make get_xsave_addr() argument a xfeature number instead of mask and fix
up its callers.
As part of this use xfeature_nr and xfeature_mask consistently.
This results in changes to the kvm code as:
	feature -> xfeature_mask
	index -> xfeature_nr

Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 arch/x86/include/asm/fpu/xstate.h |  4 ++--
 arch/x86/kernel/fpu/xstate.c      | 23 +++++++++++------------
 arch/x86/kernel/traps.c           |  2 +-
 arch/x86/kvm/x86.c                | 28 ++++++++++++++--------------
 arch/x86/mm/mpx.c                 |  6 +++---
 5 files changed, 31 insertions(+), 32 deletions(-)

Comments

Borislav Petkov Jan. 28, 2019, 6:49 p.m. UTC | #1
On Wed, Jan 09, 2019 at 12:47:33PM +0100, Sebastian Andrzej Siewior wrote:
> After changing the argument of __raw_xsave_addr() from a mask to number
> Dave suggested to check if it makes sense to do the same for
> get_xsave_addr(). As it turns out it does. Only get_xsave_addr() needs
> the mask to check if the requested feature is part of what is
> support/saved and then uses the number again. The shift operation is
> cheaper compared to "find last bit set". Also, the feature number uses
> less opcode space compared to the mask :)
> 
> Make get_xsave_addr() argument a xfeature number instead of mask and fix
> up its callers.
> As part of this use xfeature_nr and xfeature_mask consistently.

Good!

> This results in changes to the kvm code as:
> 	feature -> xfeature_mask
> 	index -> xfeature_nr
> 
> Suggested-by: Dave Hansen <dave.hansen@linux.intel.com>
> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
> ---
>  arch/x86/include/asm/fpu/xstate.h |  4 ++--
>  arch/x86/kernel/fpu/xstate.c      | 23 +++++++++++------------
>  arch/x86/kernel/traps.c           |  2 +-
>  arch/x86/kvm/x86.c                | 28 ++++++++++++++--------------
>  arch/x86/mm/mpx.c                 |  6 +++---
>  5 files changed, 31 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
> index 48581988d78c7..fbe41f808e5d8 100644
> --- a/arch/x86/include/asm/fpu/xstate.h
> +++ b/arch/x86/include/asm/fpu/xstate.h
> @@ -46,8 +46,8 @@ extern void __init update_regset_xstate_info(unsigned int size,
>  					     u64 xstate_mask);
>  
>  void fpu__xstate_clear_all_cpu_caps(void);
> -void *get_xsave_addr(struct xregs_state *xsave, int xstate);
> -const void *get_xsave_field_ptr(int xstate_field);
> +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
> +const void *get_xsave_field_ptr(int xfeature_nr);
>  int using_compacted_format(void);
>  int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
>  int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
> diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
> index 0e759a032c1c7..d288e4d271b71 100644
> --- a/arch/x86/kernel/fpu/xstate.c
> +++ b/arch/x86/kernel/fpu/xstate.c
> @@ -830,15 +830,15 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
>   *
>   * Inputs:
>   *	xstate: the thread's storage area for all FPU data
> - *	xstate_feature: state which is defined in xsave.h (e.g.
> - *	XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
> + *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
> + *	XFEATURE_SSE, etc...)
>   * Output:
>   *	address of the state in the xsave area, or NULL if the
>   *	field is not present in the xsave buffer.
>   */
> -void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
> +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
>  {
> -	int xfeature_nr;
> +	u64 xfeature_mask = 1ULL << xfeature_nr;

You can paste directly BIT_ULL(xfeature_nr) where you need it in this
function...

>  	/*
>  	 * Do we even *have* xsave state?
>  	 */
> @@ -850,11 +850,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
>  	 * have not enabled.  Remember that pcntxt_mask is
>  	 * what we write to the XCR0 register.
>  	 */
> -	WARN_ONCE(!(xfeatures_mask & xstate_feature),
> +	WARN_ONCE(!(xfeatures_mask & xfeature_mask),

... and turn this into:

	WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr))

which is more readable than the AND of two variables which I had to
re-focus my eyes to see the difference. :)

Oh and this way, gcc generates better code by doing simply a BT
directly:

# arch/x86/kernel/fpu/xstate.c:852:     WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
        .loc 1 852 2 view .LVU258
        movq    xfeatures_mask(%rip), %rax      # xfeatures_mask, tmp124
        btq     %rsi, %rax      # xfeature_nr, tmp124


without first computing the shift into xfeature_mask:

# arch/x86/kernel/fpu/xstate.c:841:     u64 xfeature_mask = 1ULL << xfeature_nr;
        .loc 1 841 6 view .LVU249
        movl    %esi, %ecx      # xfeature_nr, tmp120
        movl    $1, %ebp        #, tmp105
        salq    %cl, %rbp       # tmp120, xfeature_mask

and then testing it:

# arch/x86/kernel/fpu/xstate.c:853:     WARN_ONCE(!(xfeatures_mask & xfeature_mask),
        .loc 1 853 2 view .LVU256
        testq   %rbp, xfeatures_mask(%rip)      # xfeature_mask, xfeatures_mask
        movq    %rdi, %rbx      # xsave, xsave


Otherwise a nice cleanup!

Thx.
Sebastian Andrzej Siewior Feb. 7, 2019, 11:13 a.m. UTC | #2
On 2019-01-28 19:49:59 [+0100], Borislav Petkov wrote:
> > --- a/arch/x86/kernel/fpu/xstate.c
> > +++ b/arch/x86/kernel/fpu/xstate.c
> > @@ -830,15 +830,15 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)> > -void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
> > +void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
> >  {
> > -	int xfeature_nr;
> > +	u64 xfeature_mask = 1ULL << xfeature_nr;
> 
> You can paste directly BIT_ULL(xfeature_nr) where you need it in this
> function...
changed.

> > @@ -850,11 +850,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
> >  	 * have not enabled.  Remember that pcntxt_mask is
> >  	 * what we write to the XCR0 register.
> >  	 */
> > -	WARN_ONCE(!(xfeatures_mask & xstate_feature),
> > +	WARN_ONCE(!(xfeatures_mask & xfeature_mask),
> 
> ... and turn this into:
> 
> 	WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr))
>
> which is more readable than the AND of two variables which I had to
> re-focus my eyes to see the difference. :)
> 
you mean with vs without the `s' ?

> Oh and this way, gcc generates better code by doing simply a BT
> directly:
> 
> # arch/x86/kernel/fpu/xstate.c:852:     WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
>         .loc 1 852 2 view .LVU258
>         movq    xfeatures_mask(%rip), %rax      # xfeatures_mask, tmp124
>         btq     %rsi, %rax      # xfeature_nr, tmp124

interesting. gcc should know that it can use btq or shift + and because
it has all the raw data.
Anyway, I replaced the two user of xfeature_mask with
BIT_ULL(xfeature_nr).

> Thx.

Sebastian
Borislav Petkov Feb. 13, 2019, 9:31 a.m. UTC | #3
On Thu, Feb 07, 2019 at 12:13:40PM +0100, Sebastian Andrzej Siewior wrote:
> you mean with vs without the `s' ?

Yahaa. :)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 48581988d78c7..fbe41f808e5d8 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -46,8 +46,8 @@  extern void __init update_regset_xstate_info(unsigned int size,
 					     u64 xstate_mask);
 
 void fpu__xstate_clear_all_cpu_caps(void);
-void *get_xsave_addr(struct xregs_state *xsave, int xstate);
-const void *get_xsave_field_ptr(int xstate_field);
+void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+const void *get_xsave_field_ptr(int xfeature_nr);
 int using_compacted_format(void);
 int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
 int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0e759a032c1c7..d288e4d271b71 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -830,15 +830,15 @@  static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
  *
  * Inputs:
  *	xstate: the thread's storage area for all FPU data
- *	xstate_feature: state which is defined in xsave.h (e.g.
- *	XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
+ *	xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ *	XFEATURE_SSE, etc...)
  * Output:
  *	address of the state in the xsave area, or NULL if the
  *	field is not present in the xsave buffer.
  */
-void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
+void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
 {
-	int xfeature_nr;
+	u64 xfeature_mask = 1ULL << xfeature_nr;
 	/*
 	 * Do we even *have* xsave state?
 	 */
@@ -850,11 +850,11 @@  void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
 	 * have not enabled.  Remember that pcntxt_mask is
 	 * what we write to the XCR0 register.
 	 */
-	WARN_ONCE(!(xfeatures_mask & xstate_feature),
+	WARN_ONCE(!(xfeatures_mask & xfeature_mask),
 		  "get of unsupported state");
 	/*
 	 * This assumes the last 'xsave*' instruction to
-	 * have requested that 'xstate_feature' be saved.
+	 * have requested that 'xfeature_mask' be saved.
 	 * If it did not, we might be seeing and old value
 	 * of the field in the buffer.
 	 *
@@ -863,10 +863,9 @@  void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
 	 * or because the "init optimization" caused it
 	 * to not be saved.
 	 */
-	if (!(xsave->header.xfeatures & xstate_feature))
+	if (!(xsave->header.xfeatures & xfeature_mask))
 		return NULL;
 
-	xfeature_nr = fls64(xstate_feature) - 1;
 	return __raw_xsave_addr(xsave, xfeature_nr);
 }
 EXPORT_SYMBOL_GPL(get_xsave_addr);
@@ -882,13 +881,13 @@  EXPORT_SYMBOL_GPL(get_xsave_addr);
  * Note that this only works on the current task.
  *
  * Inputs:
- *	@xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- *	XFEATURE_MASK_SSE, etc...)
+ *	@xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ *	XFEATURE_SSE, etc...)
  * Output:
  *	address of the state in the xsave area or NULL if the state
  *	is not present or is in its 'init state'.
  */
-const void *get_xsave_field_ptr(int xsave_state)
+const void *get_xsave_field_ptr(int xfeature_nr)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
@@ -898,7 +897,7 @@  const void *get_xsave_field_ptr(int xsave_state)
 	 */
 	fpu__save(fpu);
 
-	return get_xsave_addr(&fpu->state.xsave, xsave_state);
+	return get_xsave_addr(&fpu->state.xsave, xfeature_nr);
 }
 
 #ifdef CONFIG_ARCH_HAS_PKEYS
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9b7c4ca8f0a73..626853b2ac344 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -455,7 +455,7 @@  dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
 	 * which is all zeros which indicates MPX was not
 	 * responsible for the exception.
 	 */
-	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
 	if (!bndcsr)
 		goto exit_trap;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 02c8e095a2390..6c21aa5c00e58 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3662,15 +3662,15 @@  static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 	 */
 	valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
 	while (valid) {
-		u64 feature = valid & -valid;
-		int index = fls64(feature) - 1;
-		void *src = get_xsave_addr(xsave, feature);
+		u64 xfeature_mask = valid & -valid;
+		int xfeature_nr = fls64(xfeature_mask) - 1;
+		void *src = get_xsave_addr(xsave, xfeature_nr);
 
 		if (src) {
 			u32 size, offset, ecx, edx;
-			cpuid_count(XSTATE_CPUID, index,
+			cpuid_count(XSTATE_CPUID, xfeature_nr,
 				    &size, &offset, &ecx, &edx);
-			if (feature == XFEATURE_MASK_PKRU)
+			if (xfeature_nr == XFEATURE_PKRU)
 				memcpy(dest + offset, &vcpu->arch.pkru,
 				       sizeof(vcpu->arch.pkru));
 			else
@@ -3678,7 +3678,7 @@  static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
 
 		}
 
-		valid -= feature;
+		valid -= xfeature_mask;
 	}
 }
 
@@ -3705,22 +3705,22 @@  static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
 	 */
 	valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
 	while (valid) {
-		u64 feature = valid & -valid;
-		int index = fls64(feature) - 1;
-		void *dest = get_xsave_addr(xsave, feature);
+		u64 xfeature_mask = valid & -valid;
+		int xfeature_nr = fls64(xfeature_mask) - 1;
+		void *dest = get_xsave_addr(xsave, xfeature_nr);
 
 		if (dest) {
 			u32 size, offset, ecx, edx;
-			cpuid_count(XSTATE_CPUID, index,
+			cpuid_count(XSTATE_CPUID, xfeature_nr,
 				    &size, &offset, &ecx, &edx);
-			if (feature == XFEATURE_MASK_PKRU)
+			if (xfeature_nr == XFEATURE_PKRU)
 				memcpy(&vcpu->arch.pkru, src + offset,
 				       sizeof(vcpu->arch.pkru));
 			else
 				memcpy(dest, src + offset, size);
 		}
 
-		valid -= feature;
+		valid -= xfeature_mask;
 	}
 }
 
@@ -8804,11 +8804,11 @@  void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		if (init_event)
 			kvm_put_guest_fpu(vcpu);
 		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-					XFEATURE_MASK_BNDREGS);
+					XFEATURE_BNDREGS);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
 		mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
-					XFEATURE_MASK_BNDCSR);
+					XFEATURE_BNDCSR);
 		if (mpx_state_buffer)
 			memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
 		if (init_event)
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index de1851d156997..c1ec9d81c627c 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -142,7 +142,7 @@  int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
 		goto err_out;
 	}
 	/* get bndregs field from current task's xsave area */
-	bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
+	bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS);
 	if (!bndregs) {
 		err = -EINVAL;
 		goto err_out;
@@ -190,7 +190,7 @@  static __user void *mpx_get_bounds_dir(void)
 	 * The bounds directory pointer is stored in a register
 	 * only accessible if we first do an xsave.
 	 */
-	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
 	if (!bndcsr)
 		return MPX_INVALID_BOUNDS_DIR;
 
@@ -376,7 +376,7 @@  static int do_mpx_bt_fault(void)
 	const struct mpx_bndcsr *bndcsr;
 	struct mm_struct *mm = current->mm;
 
-	bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+	bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
 	if (!bndcsr)
 		return -EINVAL;
 	/*