diff mbox series

[RFC,v2,5/8] arm64/sve: Implement an helper to flush SVE registers

Message ID 20190613161656.20765-6-julien.grall@arm.com (mailing list archive)
State RFC
Headers show
Series arm64/sve: First steps towards optimizing syscalls | expand

Commit Message

Julien Grall June 13, 2019, 4:16 p.m. UTC
Introduce a new helper that will zero all SVE registers but the first
128-bits of each vector.

Signed-off-by: Julien Grall <julien.grall@arm.com>

---
    Changes in v2:
        - Fix typo in the commit title
---
 arch/arm64/include/asm/fpsimd.h       |  3 +++
 arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++
 arch/arm64/kernel/entry-fpsimd.S      |  7 +++++++
 3 files changed, 29 insertions(+)

Comments

Dave Martin June 21, 2019, 3:33 p.m. UTC | #1
On Thu, Jun 13, 2019 at 05:16:53PM +0100, Julien Grall wrote:
> Introduce a new helper that will zero all SVE registers but the first
> 128-bits of each vector.

Maybe mention that the helper will be used by a subsequent patch.  For
now, it's dead code.

Maybe also mention briefly what this will be used for: i.e., to avoid
the costly store/munge/reload sequences currently used by things like
do_sve_acc().

> Signed-off-by: Julien Grall <julien.grall@arm.com>
> 
> ---
>     Changes in v2:
>         - Fix typo in the commit title
> ---
>  arch/arm64/include/asm/fpsimd.h       |  3 +++
>  arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++
>  arch/arm64/kernel/entry-fpsimd.S      |  7 +++++++
>  3 files changed, 29 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index df62bbd33a9a..fda3544c9606 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -83,6 +83,9 @@ static inline void *sve_pffr(struct thread_struct *thread)
>  extern void sve_save_state(void *state, u32 *pfpsr);
>  extern void sve_load_state(void const *state, u32 const *pfpsr,
>  			   unsigned long vq_minus_1);
> +
> +extern void sve_flush_live(void);
> +

We probably don't need the extra blank lines here... these operations
are all closely related low-level backend functions.

>  extern unsigned int sve_get_vl(void);
>  
>  struct arm64_cpu_capabilities;
> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
> index 5e291d9c1ba0..a41ab337bf42 100644
> --- a/arch/arm64/include/asm/fpsimdmacros.h
> +++ b/arch/arm64/include/asm/fpsimdmacros.h
> @@ -175,6 +175,13 @@
>  		| ((\np) << 5)
>  .endm
>  
> +/* PFALSE P\np.B */
> +.macro _sve_pfalse np
> +	_sve_check_preg \np
> +	.inst	0x2518e400			\
> +		| (\np)
> +.endm
> +
>  .macro __for from:req, to:req
>  	.if (\from) == (\to)
>  		_for__body %\from
> @@ -209,6 +216,18 @@
>  921:
>  .endm
>  
> +/* Preserve the first 128-bits of Znz and zero the rest. */
> +.macro _sve_flush_z nz
> +	_sve_check_zreg \nz
> +	mov	v\nz\().16b, v\nz\().16b
> +.endm
> +
> +.macro sve_flush
> + _for n, 0, 31, _sve_flush_z	\n
> + _for n, 0, 15, _sve_pfalse	\n
> +		_sve_wrffr	0
> +.endm
> +
>  .macro sve_save nxbase, xpfpsr, nxtmp
>   _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
>   _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> index 12d4958e6429..17121a51c41f 100644
> --- a/arch/arm64/kernel/entry-fpsimd.S
> +++ b/arch/arm64/kernel/entry-fpsimd.S
> @@ -57,4 +57,11 @@ ENTRY(sve_get_vl)
>  	_sve_rdvl	0, 1
>  	ret
>  ENDPROC(sve_get_vl)
> +
> +/* Zero all SVE registers but the first 128-bits of each vector */
> +ENTRY(sve_flush_live)
> +	sve_flush
> +	ret
> +ENDPROC(sve_flush_live)
> +
>  #endif /* CONFIG_ARM64_SVE */

The extra blank line makes this more readable, but in the interests
of symmetry can you also add a blank after the corresponding #ifdef?

[...]

With those addressed as appropriate,

Reviewed-by: Dave Martin <Dave.Martin@arm.com>

Cheers
---Dave
Julien Grall June 24, 2019, 4:28 p.m. UTC | #2
Hi Dave,

On 6/21/19 4:33 PM, Dave Martin wrote:
> On Thu, Jun 13, 2019 at 05:16:53PM +0100, Julien Grall wrote:
>> Introduce a new helper that will zero all SVE registers but the first
>> 128-bits of each vector.
> 
> Maybe mention that the helper will be used by a subsequent patch.  For
> now, it's dead code.
> 
> Maybe also mention briefly what this will be used for: i.e., to avoid
> the costly store/munge/reload sequences currently used by things like
> do_sve_acc().

How about the following commit message:

"Introduce a new helper that will zero all SVE registers but the first 
128-bits of each vector. This will be used in subsequent patch to avoid 
the costly store/munge/reload sequences used in place such as do_sve_acc()."

> 
>> Signed-off-by: Julien Grall <julien.grall@arm.com>
>>
>> ---
>>      Changes in v2:
>>          - Fix typo in the commit title
>> ---
>>   arch/arm64/include/asm/fpsimd.h       |  3 +++
>>   arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++
>>   arch/arm64/kernel/entry-fpsimd.S      |  7 +++++++
>>   3 files changed, 29 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
>> index df62bbd33a9a..fda3544c9606 100644
>> --- a/arch/arm64/include/asm/fpsimd.h
>> +++ b/arch/arm64/include/asm/fpsimd.h
>> @@ -83,6 +83,9 @@ static inline void *sve_pffr(struct thread_struct *thread)
>>   extern void sve_save_state(void *state, u32 *pfpsr);
>>   extern void sve_load_state(void const *state, u32 const *pfpsr,
>>   			   unsigned long vq_minus_1);
>> +
>> +extern void sve_flush_live(void);
>> +
> 
> We probably don't need the extra blank lines here... these operations
> are all closely related low-level backend functions.

Sure.

> 
>>   extern unsigned int sve_get_vl(void);
>>   
>>   struct arm64_cpu_capabilities;
>> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
>> index 5e291d9c1ba0..a41ab337bf42 100644
>> --- a/arch/arm64/include/asm/fpsimdmacros.h
>> +++ b/arch/arm64/include/asm/fpsimdmacros.h
>> @@ -175,6 +175,13 @@
>>   		| ((\np) << 5)
>>   .endm
>>   
>> +/* PFALSE P\np.B */
>> +.macro _sve_pfalse np
>> +	_sve_check_preg \np
>> +	.inst	0x2518e400			\
>> +		| (\np)
>> +.endm
>> +
>>   .macro __for from:req, to:req
>>   	.if (\from) == (\to)
>>   		_for__body %\from
>> @@ -209,6 +216,18 @@
>>   921:
>>   .endm
>>   
>> +/* Preserve the first 128-bits of Znz and zero the rest. */
>> +.macro _sve_flush_z nz
>> +	_sve_check_zreg \nz
>> +	mov	v\nz\().16b, v\nz\().16b
>> +.endm
>> +
>> +.macro sve_flush
>> + _for n, 0, 31, _sve_flush_z	\n
>> + _for n, 0, 15, _sve_pfalse	\n
>> +		_sve_wrffr	0
>> +.endm
>> +
>>   .macro sve_save nxbase, xpfpsr, nxtmp
>>    _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
>>    _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
>> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
>> index 12d4958e6429..17121a51c41f 100644
>> --- a/arch/arm64/kernel/entry-fpsimd.S
>> +++ b/arch/arm64/kernel/entry-fpsimd.S
>> @@ -57,4 +57,11 @@ ENTRY(sve_get_vl)
>>   	_sve_rdvl	0, 1
>>   	ret
>>   ENDPROC(sve_get_vl)
>> +
>> +/* Zero all SVE registers but the first 128-bits of each vector */
>> +ENTRY(sve_flush_live)
>> +	sve_flush
>> +	ret
>> +ENDPROC(sve_flush_live)
>> +
>>   #endif /* CONFIG_ARM64_SVE */
> 
> The extra blank line makes this more readable, but in the interests
> of symmetry can you also add a blank after the corresponding #ifdef?

I would prefer to do this change in a separate patch. So I will drop the 
newline here.

> 
> [...]
> 
> With those addressed as appropriate,
> 
> Reviewed-by: Dave Martin <Dave.Martin@arm.com>

Thank you!

Cheers,
Dave Martin June 25, 2019, 9:37 a.m. UTC | #3
On Mon, Jun 24, 2019 at 05:28:56PM +0100, Julien Grall wrote:
> Hi Dave,
> 
> On 6/21/19 4:33 PM, Dave Martin wrote:
> >On Thu, Jun 13, 2019 at 05:16:53PM +0100, Julien Grall wrote:
> >>Introduce a new helper that will zero all SVE registers but the first
> >>128-bits of each vector.
> >
> >Maybe mention that the helper will be used by a subsequent patch.  For
> >now, it's dead code.
> >
> >Maybe also mention briefly what this will be used for: i.e., to avoid
> >the costly store/munge/reload sequences currently used by things like
> >do_sve_acc().
> 
> How about the following commit message:
> 
> "Introduce a new helper that will zero all SVE registers but the first
> 128-bits of each vector. This will be used in subsequent patch to avoid the
> costly store/munge/reload sequences used in place such as do_sve_acc()."

Sure, that works.

> 
> >
> >>Signed-off-by: Julien Grall <julien.grall@arm.com>
> >>
> >>---
> >>     Changes in v2:
> >>         - Fix typo in the commit title
> >>---
> >>  arch/arm64/include/asm/fpsimd.h       |  3 +++
> >>  arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++
> >>  arch/arm64/kernel/entry-fpsimd.S      |  7 +++++++
> >>  3 files changed, 29 insertions(+)
> >>
> >>diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> >>index df62bbd33a9a..fda3544c9606 100644
> >>--- a/arch/arm64/include/asm/fpsimd.h
> >>+++ b/arch/arm64/include/asm/fpsimd.h
> >>@@ -83,6 +83,9 @@ static inline void *sve_pffr(struct thread_struct *thread)
> >>  extern void sve_save_state(void *state, u32 *pfpsr);
> >>  extern void sve_load_state(void const *state, u32 const *pfpsr,
> >>  			   unsigned long vq_minus_1);
> >>+
> >>+extern void sve_flush_live(void);
> >>+
> >
> >We probably don't need the extra blank lines here... these operations
> >are all closely related low-level backend functions.
> 
> Sure.

[...]

> >>diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
> >>index 12d4958e6429..17121a51c41f 100644
> >>--- a/arch/arm64/kernel/entry-fpsimd.S
> >>+++ b/arch/arm64/kernel/entry-fpsimd.S
> >>@@ -57,4 +57,11 @@ ENTRY(sve_get_vl)
> >>  	_sve_rdvl	0, 1
> >>  	ret
> >>  ENDPROC(sve_get_vl)
> >>+
> >>+/* Zero all SVE registers but the first 128-bits of each vector */
> >>+ENTRY(sve_flush_live)
> >>+	sve_flush
> >>+	ret
> >>+ENDPROC(sve_flush_live)
> >>+
> >>  #endif /* CONFIG_ARM64_SVE */
> >
> >The extra blank line makes this more readable, but in the interests
> >of symmetry can you also add a blank after the corresponding #ifdef?
> 
> I would prefer to do this change in a separate patch. So I will drop the
> newline here.

OK, sounds reasonable.

> 
> >
> >[...]
> >
> >With those addressed as appropriate,
> >
> >Reviewed-by: Dave Martin <Dave.Martin@arm.com>

Feel free to keep this tag.

Cheers
---Dave
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index df62bbd33a9a..fda3544c9606 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -83,6 +83,9 @@  static inline void *sve_pffr(struct thread_struct *thread)
 extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
 			   unsigned long vq_minus_1);
+
+extern void sve_flush_live(void);
+
 extern unsigned int sve_get_vl(void);
 
 struct arm64_cpu_capabilities;
diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h
index 5e291d9c1ba0..a41ab337bf42 100644
--- a/arch/arm64/include/asm/fpsimdmacros.h
+++ b/arch/arm64/include/asm/fpsimdmacros.h
@@ -175,6 +175,13 @@ 
 		| ((\np) << 5)
 .endm
 
+/* PFALSE P\np.B */
+.macro _sve_pfalse np
+	_sve_check_preg \np
+	.inst	0x2518e400			\
+		| (\np)
+.endm
+
 .macro __for from:req, to:req
 	.if (\from) == (\to)
 		_for__body %\from
@@ -209,6 +216,18 @@ 
 921:
 .endm
 
+/* Preserve the first 128-bits of Znz and zero the rest. */
+.macro _sve_flush_z nz
+	_sve_check_zreg \nz
+	mov	v\nz\().16b, v\nz\().16b
+.endm
+
+.macro sve_flush
+ _for n, 0, 31, _sve_flush_z	\n
+ _for n, 0, 15, _sve_pfalse	\n
+		_sve_wrffr	0
+.endm
+
 .macro sve_save nxbase, xpfpsr, nxtmp
  _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
  _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S
index 12d4958e6429..17121a51c41f 100644
--- a/arch/arm64/kernel/entry-fpsimd.S
+++ b/arch/arm64/kernel/entry-fpsimd.S
@@ -57,4 +57,11 @@  ENTRY(sve_get_vl)
 	_sve_rdvl	0, 1
 	ret
 ENDPROC(sve_get_vl)
+
+/* Zero all SVE registers but the first 128-bits of each vector */
+ENTRY(sve_flush_live)
+	sve_flush
+	ret
+ENDPROC(sve_flush_live)
+
 #endif /* CONFIG_ARM64_SVE */