Message ID | 20190613161656.20765-6-julien.grall@arm.com (mailing list archive) |
---|---|
State | RFC |
Headers | show |
Series | arm64/sve: First steps towards optimizing syscalls | expand |
On Thu, Jun 13, 2019 at 05:16:53PM +0100, Julien Grall wrote: > Introduce a new helper that will zero all SVE registers but the first > 128-bits of each vector. Maybe mention that the helper will be used by a subsequent patch. For now, it's dead code. Maybe also mention briefly what this will be used for: i.e., to avoid the costly store/munge/reload sequences currently used by things like do_sve_acc(). > Signed-off-by: Julien Grall <julien.grall@arm.com> > > --- > Changes in v2: > - Fix typo in the commit title > --- > arch/arm64/include/asm/fpsimd.h | 3 +++ > arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++ > arch/arm64/kernel/entry-fpsimd.S | 7 +++++++ > 3 files changed, 29 insertions(+) > > diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h > index df62bbd33a9a..fda3544c9606 100644 > --- a/arch/arm64/include/asm/fpsimd.h > +++ b/arch/arm64/include/asm/fpsimd.h > @@ -83,6 +83,9 @@ static inline void *sve_pffr(struct thread_struct *thread) > extern void sve_save_state(void *state, u32 *pfpsr); > extern void sve_load_state(void const *state, u32 const *pfpsr, > unsigned long vq_minus_1); > + > +extern void sve_flush_live(void); > + We probably don't need the extra blank lines here... these operations are all closely related low-level backend functions. > extern unsigned int sve_get_vl(void); > > struct arm64_cpu_capabilities; > diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h > index 5e291d9c1ba0..a41ab337bf42 100644 > --- a/arch/arm64/include/asm/fpsimdmacros.h > +++ b/arch/arm64/include/asm/fpsimdmacros.h > @@ -175,6 +175,13 @@ > | ((\np) << 5) > .endm > > +/* PFALSE P\np.B */ > +.macro _sve_pfalse np > + _sve_check_preg \np > + .inst 0x2518e400 \ > + | (\np) > +.endm > + > .macro __for from:req, to:req > .if (\from) == (\to) > _for__body %\from > @@ -209,6 +216,18 @@ > 921: > .endm > > +/* Preserve the first 128-bits of Znz and zero the rest. */ > +.macro _sve_flush_z nz > + _sve_check_zreg \nz > + mov v\nz\().16b, v\nz\().16b > +.endm > + > +.macro sve_flush > + _for n, 0, 31, _sve_flush_z \n > + _for n, 0, 15, _sve_pfalse \n > + _sve_wrffr 0 > +.endm > + > .macro sve_save nxbase, xpfpsr, nxtmp > _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 > _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 > diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S > index 12d4958e6429..17121a51c41f 100644 > --- a/arch/arm64/kernel/entry-fpsimd.S > +++ b/arch/arm64/kernel/entry-fpsimd.S > @@ -57,4 +57,11 @@ ENTRY(sve_get_vl) > _sve_rdvl 0, 1 > ret > ENDPROC(sve_get_vl) > + > +/* Zero all SVE registers but the first 128-bits of each vector */ > +ENTRY(sve_flush_live) > + sve_flush > + ret > +ENDPROC(sve_flush_live) > + > #endif /* CONFIG_ARM64_SVE */ The extra blank line makes this more readable, but in the interests of symmetry can you also add a blank after the corresponding #ifdef? [...] With those addressed as appropriate, Reviewed-by: Dave Martin <Dave.Martin@arm.com> Cheers ---Dave
Hi Dave, On 6/21/19 4:33 PM, Dave Martin wrote: > On Thu, Jun 13, 2019 at 05:16:53PM +0100, Julien Grall wrote: >> Introduce a new helper that will zero all SVE registers but the first >> 128-bits of each vector. > > Maybe mention that the helper will be used by a subsequent patch. For > now, it's dead code. > > Maybe also mention briefly what this will be used for: i.e., to avoid > the costly store/munge/reload sequences currently used by things like > do_sve_acc(). How about the following commit message: "Introduce a new helper that will zero all SVE registers but the first 128-bits of each vector. This will be used in subsequent patch to avoid the costly store/munge/reload sequences used in place such as do_sve_acc()." > >> Signed-off-by: Julien Grall <julien.grall@arm.com> >> >> --- >> Changes in v2: >> - Fix typo in the commit title >> --- >> arch/arm64/include/asm/fpsimd.h | 3 +++ >> arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++ >> arch/arm64/kernel/entry-fpsimd.S | 7 +++++++ >> 3 files changed, 29 insertions(+) >> >> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h >> index df62bbd33a9a..fda3544c9606 100644 >> --- a/arch/arm64/include/asm/fpsimd.h >> +++ b/arch/arm64/include/asm/fpsimd.h >> @@ -83,6 +83,9 @@ static inline void *sve_pffr(struct thread_struct *thread) >> extern void sve_save_state(void *state, u32 *pfpsr); >> extern void sve_load_state(void const *state, u32 const *pfpsr, >> unsigned long vq_minus_1); >> + >> +extern void sve_flush_live(void); >> + > > We probably don't need the extra blank lines here... these operations > are all closely related low-level backend functions. Sure. > >> extern unsigned int sve_get_vl(void); >> >> struct arm64_cpu_capabilities; >> diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h >> index 5e291d9c1ba0..a41ab337bf42 100644 >> --- a/arch/arm64/include/asm/fpsimdmacros.h >> +++ b/arch/arm64/include/asm/fpsimdmacros.h >> @@ -175,6 +175,13 @@ >> | ((\np) << 5) >> .endm >> >> +/* PFALSE P\np.B */ >> +.macro _sve_pfalse np >> + _sve_check_preg \np >> + .inst 0x2518e400 \ >> + | (\np) >> +.endm >> + >> .macro __for from:req, to:req >> .if (\from) == (\to) >> _for__body %\from >> @@ -209,6 +216,18 @@ >> 921: >> .endm >> >> +/* Preserve the first 128-bits of Znz and zero the rest. */ >> +.macro _sve_flush_z nz >> + _sve_check_zreg \nz >> + mov v\nz\().16b, v\nz\().16b >> +.endm >> + >> +.macro sve_flush >> + _for n, 0, 31, _sve_flush_z \n >> + _for n, 0, 15, _sve_pfalse \n >> + _sve_wrffr 0 >> +.endm >> + >> .macro sve_save nxbase, xpfpsr, nxtmp >> _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 >> _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 >> diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S >> index 12d4958e6429..17121a51c41f 100644 >> --- a/arch/arm64/kernel/entry-fpsimd.S >> +++ b/arch/arm64/kernel/entry-fpsimd.S >> @@ -57,4 +57,11 @@ ENTRY(sve_get_vl) >> _sve_rdvl 0, 1 >> ret >> ENDPROC(sve_get_vl) >> + >> +/* Zero all SVE registers but the first 128-bits of each vector */ >> +ENTRY(sve_flush_live) >> + sve_flush >> + ret >> +ENDPROC(sve_flush_live) >> + >> #endif /* CONFIG_ARM64_SVE */ > > The extra blank line makes this more readable, but in the interests > of symmetry can you also add a blank after the corresponding #ifdef? I would prefer to do this change in a separate patch. So I will drop the newline here. > > [...] > > With those addressed as appropriate, > > Reviewed-by: Dave Martin <Dave.Martin@arm.com> Thank you! Cheers,
On Mon, Jun 24, 2019 at 05:28:56PM +0100, Julien Grall wrote: > Hi Dave, > > On 6/21/19 4:33 PM, Dave Martin wrote: > >On Thu, Jun 13, 2019 at 05:16:53PM +0100, Julien Grall wrote: > >>Introduce a new helper that will zero all SVE registers but the first > >>128-bits of each vector. > > > >Maybe mention that the helper will be used by a subsequent patch. For > >now, it's dead code. > > > >Maybe also mention briefly what this will be used for: i.e., to avoid > >the costly store/munge/reload sequences currently used by things like > >do_sve_acc(). > > How about the following commit message: > > "Introduce a new helper that will zero all SVE registers but the first > 128-bits of each vector. This will be used in subsequent patch to avoid the > costly store/munge/reload sequences used in place such as do_sve_acc()." Sure, that works. > > > > >>Signed-off-by: Julien Grall <julien.grall@arm.com> > >> > >>--- > >> Changes in v2: > >> - Fix typo in the commit title > >>--- > >> arch/arm64/include/asm/fpsimd.h | 3 +++ > >> arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++ > >> arch/arm64/kernel/entry-fpsimd.S | 7 +++++++ > >> 3 files changed, 29 insertions(+) > >> > >>diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h > >>index df62bbd33a9a..fda3544c9606 100644 > >>--- a/arch/arm64/include/asm/fpsimd.h > >>+++ b/arch/arm64/include/asm/fpsimd.h > >>@@ -83,6 +83,9 @@ static inline void *sve_pffr(struct thread_struct *thread) > >> extern void sve_save_state(void *state, u32 *pfpsr); > >> extern void sve_load_state(void const *state, u32 const *pfpsr, > >> unsigned long vq_minus_1); > >>+ > >>+extern void sve_flush_live(void); > >>+ > > > >We probably don't need the extra blank lines here... these operations > >are all closely related low-level backend functions. > > Sure. [...] > >>diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S > >>index 12d4958e6429..17121a51c41f 100644 > >>--- a/arch/arm64/kernel/entry-fpsimd.S > >>+++ b/arch/arm64/kernel/entry-fpsimd.S > >>@@ -57,4 +57,11 @@ ENTRY(sve_get_vl) > >> _sve_rdvl 0, 1 > >> ret > >> ENDPROC(sve_get_vl) > >>+ > >>+/* Zero all SVE registers but the first 128-bits of each vector */ > >>+ENTRY(sve_flush_live) > >>+ sve_flush > >>+ ret > >>+ENDPROC(sve_flush_live) > >>+ > >> #endif /* CONFIG_ARM64_SVE */ > > > >The extra blank line makes this more readable, but in the interests > >of symmetry can you also add a blank after the corresponding #ifdef? > > I would prefer to do this change in a separate patch. So I will drop the > newline here. OK, sounds reasonable. > > > > >[...] > > > >With those addressed as appropriate, > > > >Reviewed-by: Dave Martin <Dave.Martin@arm.com> Feel free to keep this tag. Cheers ---Dave
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index df62bbd33a9a..fda3544c9606 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -83,6 +83,9 @@ static inline void *sve_pffr(struct thread_struct *thread) extern void sve_save_state(void *state, u32 *pfpsr); extern void sve_load_state(void const *state, u32 const *pfpsr, unsigned long vq_minus_1); + +extern void sve_flush_live(void); + extern unsigned int sve_get_vl(void); struct arm64_cpu_capabilities; diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 5e291d9c1ba0..a41ab337bf42 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -175,6 +175,13 @@ | ((\np) << 5) .endm +/* PFALSE P\np.B */ +.macro _sve_pfalse np + _sve_check_preg \np + .inst 0x2518e400 \ + | (\np) +.endm + .macro __for from:req, to:req .if (\from) == (\to) _for__body %\from @@ -209,6 +216,18 @@ 921: .endm +/* Preserve the first 128-bits of Znz and zero the rest. */ +.macro _sve_flush_z nz + _sve_check_zreg \nz + mov v\nz\().16b, v\nz\().16b +.endm + +.macro sve_flush + _for n, 0, 31, _sve_flush_z \n + _for n, 0, 15, _sve_pfalse \n + _sve_wrffr 0 +.endm + .macro sve_save nxbase, xpfpsr, nxtmp _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16 diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 12d4958e6429..17121a51c41f 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -57,4 +57,11 @@ ENTRY(sve_get_vl) _sve_rdvl 0, 1 ret ENDPROC(sve_get_vl) + +/* Zero all SVE registers but the first 128-bits of each vector */ +ENTRY(sve_flush_live) + sve_flush + ret +ENDPROC(sve_flush_live) + #endif /* CONFIG_ARM64_SVE */
Introduce a new helper that will zero all SVE registers but the first 128-bits of each vector. Signed-off-by: Julien Grall <julien.grall@arm.com> --- Changes in v2: - Fix typo in the commit title --- arch/arm64/include/asm/fpsimd.h | 3 +++ arch/arm64/include/asm/fpsimdmacros.h | 19 +++++++++++++++++++ arch/arm64/kernel/entry-fpsimd.S | 7 +++++++ 3 files changed, 29 insertions(+)