diff mbox series

[-next,v13,10/19] riscv: Allocate user's vector context in the first-use trap

Message ID 20230125142056.18356-11-andy.chiu@sifive.com (mailing list archive)
State New, archived
Headers show
Series riscv: Add vector ISA support | expand

Commit Message

Andy Chiu Jan. 25, 2023, 2:20 p.m. UTC
Vector unit is disabled by default for all user processes. Thus, a
process will take a trap (illegal instruction) into kernel at the first
time when it uses Vector. Only after then, the kernel allocates V
context and starts take care of the context for that user process.

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Link: https://lore.kernel.org/r/3923eeee-e4dc-0911-40bf-84c34aee962d@linaro.org
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
---
 arch/riscv/include/asm/insn.h   | 24 +++++++++
 arch/riscv/include/asm/vector.h |  2 +
 arch/riscv/kernel/Makefile      |  1 +
 arch/riscv/kernel/vector.c      | 89 +++++++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+)
 create mode 100644 arch/riscv/kernel/vector.c

Comments

Conor Dooley Jan. 26, 2023, 11:11 p.m. UTC | #1
Hey Andy!

On Wed, Jan 25, 2023 at 02:20:47PM +0000, Andy Chiu wrote:
> Vector unit is disabled by default for all user processes. Thus, a
> process will take a trap (illegal instruction) into kernel at the first
> time when it uses Vector. Only after then, the kernel allocates V
> context and starts take care of the context for that user process.

I'm mostly ambivalent about the methods you lot discussed for turning v
on when needed, so this WFM :)

> Suggested-by: Richard Henderson <richard.henderson@linaro.org>
> Link: https://lore.kernel.org/r/3923eeee-e4dc-0911-40bf-84c34aee962d@linaro.org
> Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
> ---
>  arch/riscv/include/asm/insn.h   | 24 +++++++++
>  arch/riscv/include/asm/vector.h |  2 +
>  arch/riscv/kernel/Makefile      |  1 +
>  arch/riscv/kernel/vector.c      | 89 +++++++++++++++++++++++++++++++++
>  4 files changed, 116 insertions(+)
>  create mode 100644 arch/riscv/kernel/vector.c
> 
> diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
> index 25ef9c0b19e7..b1ef3617881f 100644
> --- a/arch/riscv/include/asm/insn.h
> +++ b/arch/riscv/include/asm/insn.h
> @@ -133,6 +133,24 @@
>  #define RVG_OPCODE_JALR		0x67
>  #define RVG_OPCODE_JAL		0x6f
>  #define RVG_OPCODE_SYSTEM	0x73
> +#define RVG_SYSTEM_CSR_OFF	20
> +#define RVG_SYSTEM_CSR_MASK	GENMASK(12, 0)

These ones look good.

> +
> +/* parts of opcode for RVV */
> +#define OPCODE_VECTOR		0x57
> +#define LSFP_WIDTH_RVV_8	0
> +#define LSFP_WIDTH_RVV_16	5
> +#define LSFP_WIDTH_RVV_32	6
> +#define LSFP_WIDTH_RVV_64	7

All of this needs a prefix though, not the almost-postfix you've added.
IOW, move the RVV to the start.

> +
> +/* parts of opcode for RVF, RVD and RVQ */
> +#define LSFP_WIDTH_OFF		12
> +#define LSFP_WIDTH_MASK		GENMASK(3, 0)

These all get an RVG_ prefix, no? Or does the Q prevent that? Either
way, they do need a prefix.

> +#define LSFP_WIDTH_FP_W		2
> +#define LSFP_WIDTH_FP_D		3
> +#define LSFP_WIDTH_FP_Q		4

LSFP isn't something that has hits in the spec, which is annoying for
cross checking IMO. If it were me, I'd likely do something like
RVG_FLW_FSW_WIDTH since then it is abundantly clear what this is the
width of.

> +#define OPCODE_LOADFP		0x07
> +#define OPCODE_STOREFP		0x27

Same comment about prefix here. I'd be tempted to make these names match
the spec too, but it is clear enough to me what this are at the moment.

> +#define EXTRACT_LOAD_STORE_FP_WIDTH(x) \
> +#define EXTRACT_SYSTEM_CSR(x) \

Prefixes again here please!

> +
>  /*
>   * Get the immediate from a J-type instruction.
>   *
> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> index f8a9e37c4374..7c77696d704a 100644
> --- a/arch/riscv/include/asm/vector.h
> +++ b/arch/riscv/include/asm/vector.h
> @@ -19,6 +19,7 @@
>  #define CSR_STR(x) __ASM_STR(x)
>  
>  extern unsigned long riscv_vsize;
> +bool rvv_first_use_handler(struct pt_regs *regs);

Please rename to riscv_v_...

> +static bool insn_is_vector(u32 insn_buf)
> +{
> +	u32 opcode = insn_buf & __INSN_OPCODE_MASK;

Newline here please...

> +	/*
> +	 * All V-related instructions, including CSR operations are 4-Byte. So,
> +	 * do not handle if the instruction length is not 4-Byte.
> +	 */
> +	if (unlikely(GET_INSN_LENGTH(insn_buf) != 4))
> +		return false;

...and one here please too!

> +	if (opcode == OPCODE_VECTOR) {
> +		return true;
> +	}

	if (opcode == OPCODE_LOADFP || opcode == OPCODE_STOREFP) {
The above returns, so there's no need for the else

> +		u32 width = EXTRACT_LOAD_STORE_FP_WIDTH(insn_buf);
> +
> +		if (width == LSFP_WIDTH_RVV_8 || width == LSFP_WIDTH_RVV_16 ||
> +		    width == LSFP_WIDTH_RVV_32 || width == LSFP_WIDTH_RVV_64)
> +			return true;

I suppose you could also add else return false, thereby dropping the
else in the line below too, but that's a matter of preference :)

> +	} else if (opcode == RVG_OPCODE_SYSTEM) {
> +		u32 csr = EXTRACT_SYSTEM_CSR(insn_buf);
> +
> +		if ((csr >= CSR_VSTART && csr <= CSR_VCSR) ||
> +		    (csr >= CSR_VL && csr <= CSR_VLENB))
> +			return true;
> +	}
> +	return false;
> +}

I would like Heiko to take a look at this function!
I know we have the RISCV_INSN_FUNCS stuff that got newly added, but that's
for single, named instructions. I'm just curious if there may be a neater
way to go about doing this. AFAICT, the widths are all in funct3 - but it
is a shame that 0b100 is Q and 0 is vector, as the macro works for matches
and we can't use the upper bit for that.
There's prob something you could do with XORing and XNORing bits, but at
that point it'd not be adding any clarity at all & it'd not be a
RISCV_INSN_FUNCS anymore!
The actual opcode checks probably could be extracted though, but would
love to know what Heiko thinks, even if that is "leave it as is".

> +
> +int rvv_thread_zalloc(void)

riscv_v_... and so on down the file

> +{
> +	void *datap;
> +
> +	datap = kzalloc(riscv_vsize, GFP_KERNEL);
> +	if (!datap)
> +		return -ENOMEM;
> +	current->thread.vstate.datap = datap;
> +	memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_state,
> +						    datap));
> +	return 0;
> +}
> +
> +bool rvv_first_use_handler(struct pt_regs *regs)
> +{
> +	__user u32 *epc = (u32 *)regs->epc;
> +	u32 tval = (u32)regs->badaddr;

I'm dumb, what's the t here? This variable holds an instruction, right?
Why not call it `insn` so it conveys some meaning?

> +	/* If V has been enabled then it is not the first-use trap */
> +	if (vstate_query(regs))
> +		return false;
> +	/* Get the instruction */
> +	if (!tval) {
> +		if (__get_user(tval, epc))
> +			return false;
> +	}
> +	/* Filter out non-V instructions */
> +	if (!insn_is_vector(tval))
> +		return false;
> +	/* Sanity check. datap should be null by the time of the first-use trap */
> +	WARN_ON(current->thread.vstate.datap);

Is a WARN_ON sufficient here? If on the first use trap, it's non-null
should we return false and trigger the trap error too?

> +	/*
> +	 * Now we sure that this is a V instruction. And it executes in the
> +	 * context where VS has been off. So, try to allocate the user's V
> +	 * context and resume execution.
> +	 */
> +	if (rvv_thread_zalloc()) {
> +		force_sig(SIGKILL);
> +		return true;
> +	}
> +	vstate_on(regs);
> +	return true;

Otherwise this looks sane to me!

Thanks,
Conor.
Andy Chiu Feb. 6, 2023, noon UTC | #2
On Fri, Jan 27, 2023 at 7:11 AM Conor Dooley <conor@kernel.org> wrote:
> > +
> > +/* parts of opcode for RVV */
> > +#define OPCODE_VECTOR                0x57
> > +#define LSFP_WIDTH_RVV_8     0
> > +#define LSFP_WIDTH_RVV_16    5
> > +#define LSFP_WIDTH_RVV_32    6
> > +#define LSFP_WIDTH_RVV_64    7
>
> All of this needs a prefix though, not the almost-postfix you've added.
> IOW, move the RVV to the start.
Thanks for the note. Changing to RVV_VL_VS_WIDTH_*
>
> > +
> > +/* parts of opcode for RVF, RVD and RVQ */
> > +#define LSFP_WIDTH_OFF               12
> > +#define LSFP_WIDTH_MASK              GENMASK(3, 0)
>
> These all get an RVG_ prefix, no? Or does the Q prevent that? Either
> way, they do need a prefix.
>
> > +#define LSFP_WIDTH_FP_W              2
> > +#define LSFP_WIDTH_FP_D              3
> > +#define LSFP_WIDTH_FP_Q              4
>
> LSFP isn't something that has hits in the spec, which is annoying for
> cross checking IMO. If it were me, I'd likely do something like
> RVG_FLW_FSW_WIDTH since then it is abundantly clear what this is the
> width of.
Ok, s/LSFP_WIDTH_/RVFDQ_FL_FS_WIDTH_/
>
> > +#define OPCODE_LOADFP                0x07
> > +#define OPCODE_STOREFP               0x27
>
> Same comment about prefix here. I'd be tempted to make these names match
> the spec too, but it is clear enough to me what this are at the moment.
>
These will be changed to RVFDQ_OPCODE_{FL|FS} In the next revision.
> > +#define EXTRACT_LOAD_STORE_FP_WIDTH(x) \
> > +#define EXTRACT_SYSTEM_CSR(x) \
>
> Prefixes again here please!
Adding RVG prefix and changing to RVFDQ_EXRACT_FL_FS_WIDTH
> > +     if (opcode == OPCODE_VECTOR) {
> > +             return true;
> > +     }
>
>        if (opcode == OPCODE_LOADFP || opcode == OPCODE_STOREFP) {
> The above returns, so there's no need for the else
>
> > +             u32 width = EXTRACT_LOAD_STORE_FP_WIDTH(insn_buf);
> > +
> > +             if (width == LSFP_WIDTH_RVV_8 || width == LSFP_WIDTH_RVV_16 ||
> > +                 width == LSFP_WIDTH_RVV_32 || width == LSFP_WIDTH_RVV_64)
> > +                     return true;
>
> I suppose you could also add else return false, thereby dropping the
> else in the line below too, but that's a matter of preference :)
>
> > +     } else if (opcode == RVG_OPCODE_SYSTEM) {
> > +             u32 csr = EXTRACT_SYSTEM_CSR(insn_buf);
> > +
> > +             if ((csr >= CSR_VSTART && csr <= CSR_VCSR) ||
> > +                 (csr >= CSR_VL && csr <= CSR_VLENB))
> > +                     return true;
> > +     }
> > +     return false;
> > +}
Changing it to a switch statement for better structuring.
> I would like Heiko to take a look at this function!
> I know we have the RISCV_INSN_FUNCS stuff that got newly added, but that's
> for single, named instructions. I'm just curious if there may be a neater
> way to go about doing this. AFAICT, the widths are all in funct3 - but it
> is a shame that 0b100 is Q and 0 is vector, as the macro works for matches
> and we can't use the upper bit for that.
> There's prob something you could do with XORing and XNORing bits, but at
> that point it'd not be adding any clarity at all & it'd not be a
> RISCV_INSN_FUNCS anymore!
> The actual opcode checks probably could be extracted though, but would
> love to know what Heiko thinks, even if that is "leave it as is".
I've checked the RISCV_INSN_FUNCS part recently. It seems good to
match a single type of instruction, such as vector with OP-V opcode.
However, I did not find an easy way of matching whole instructions
introduced by RVV, which includes CSR operations on multiple CSRs and
load/store with different widths. Yes, it would be great if we could
distinguish VL and VS out by the upper bit of the width. Or even
better if we could match CSR numbers for Vector this way. But I didn't
find it.
>
> > +
> > +int rvv_thread_zalloc(void)
>
> riscv_v_... and so on down the file
>
> > +{
> > +     void *datap;
> > +
> > +     datap = kzalloc(riscv_vsize, GFP_KERNEL);
> > +     if (!datap)
> > +             return -ENOMEM;
> > +     current->thread.vstate.datap = datap;
> > +     memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_state,
> > +                                                 datap));
> > +     return 0;
> > +}
> > +
> > +bool rvv_first_use_handler(struct pt_regs *regs)
> > +{
> > +     __user u32 *epc = (u32 *)regs->epc;
> > +     u32 tval = (u32)regs->badaddr;
>
> I'm dumb, what's the t here? This variable holds an instruction, right?
> Why not call it `insn` so it conveys some meaning?
tval is the trap value register. I think it is the same as badaddr but
you're right. `insn` has a better meaning here.
>
> > +     /* If V has been enabled then it is not the first-use trap */
> > +     if (vstate_query(regs))
> > +             return false;
> > +     /* Get the instruction */
> > +     if (!tval) {
> > +             if (__get_user(tval, epc))
> > +                     return false;
> > +     }
> > +     /* Filter out non-V instructions */
> > +     if (!insn_is_vector(tval))
> > +             return false;
> > +     /* Sanity check. datap should be null by the time of the first-use trap */
> > +     WARN_ON(current->thread.vstate.datap);
>
> Is a WARN_ON sufficient here? If on the first use trap, it's non-null
> should we return false and trigger the trap error too?
If we'd run into this warning message then there is a bug in kernel
space. For example, if we did not properly free and clear the datap
pointer. Or if we allocated datap somewhere else and did not set VS
accordingly. Normally, current user space programs would not expect to
run into this point, so I guess returning false here is not
meaningful. This warning message is intended for kernel debugging
only. Or, should we just strip out this check?
>
> > +     /*
> > +      * Now we sure that this is a V instruction. And it executes in the
> > +      * context where VS has been off. So, try to allocate the user's V
> > +      * context and resume execution.
> > +      */
> > +     if (rvv_thread_zalloc()) {
> > +             force_sig(SIGKILL);
> > +             return true;
> > +     }
> > +     vstate_on(regs);
> > +     return true;
>
> Otherwise this looks sane to me!
>
> Thanks,
> Conor.
>
Thanks,
Andy.
Conor Dooley Feb. 6, 2023, 1:40 p.m. UTC | #3
On 6 February 2023 13:00:00 GMT+01:00, Andy Chiu <andy.chiu@sifive.com> wrote:
>On Fri, Jan 27, 2023 at 7:11 AM Conor Dooley <conor@kernel.org> wrote:

>Changing it to a switch statement for better structuring.
>> I would like Heiko to take a look at this function!
>> I know we have the RISCV_INSN_FUNCS stuff that got newly added, but that's
>> for single, named instructions. I'm just curious if there may be a neater
>> way to go about doing this. AFAICT, the widths are all in funct3 - but it
>> is a shame that 0b100 is Q and 0 is vector, as the macro works for matches
>> and we can't use the upper bit for that.
>> There's prob something you could do with XORing and XNORing bits, but at
>> that point it'd not be adding any clarity at all & it'd not be a
>> RISCV_INSN_FUNCS anymore!
>> The actual opcode checks probably could be extracted though, but would
>> love to know what Heiko thinks, even if that is "leave it as is".
>I've checked the RISCV_INSN_FUNCS part recently. It seems good to
>match a single type of instruction, such as vector with OP-V opcode.
>However, I did not find an easy way of matching whole instructions
>introduced by RVV, which includes CSR operations on multiple CSRs and
>load/store with different widths. Yes, it would be great if we could
>distinguish VL and VS out by the upper bit of the width. Or even
>better if we could match CSR numbers for Vector this way. But I didn't
>find it.

Yup, I didn't see a straight forward way either.
I was hoping Heiko might have an idea!


>> > +     /* Sanity check. datap should be null by the time of the first-use trap */
>> > +     WARN_ON(current->thread.vstate.datap);
>>
>> Is a WARN_ON sufficient here? If on the first use trap, it's non-null
>> should we return false and trigger the trap error too?
>If we'd run into this warning message then there is a bug in kernel
>space. For example, if we did not properly free and clear the datap
>pointer. Or if we allocated datap somewhere else and did not set VS
>accordingly. Normally, current user space programs would not expect to
>run into this point, so I guess returning false here is not
>meaningful. This warning message is intended for kernel debugging
>only. Or, should we just strip out this check?

I suppose my question was "is it safe to warn and carry on, rather than disallow use of vector in this situation".

Thanks,
Conor.
Björn Töpel Feb. 7, 2023, 2:36 p.m. UTC | #4
Andy,

(Keeping the huge Cc:-list for now...)

Andy Chiu <andy.chiu@sifive.com> writes:

> diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
> new file mode 100644
> index 000000000000..cdd58d1c8b3c
> --- /dev/null
> +++ b/arch/riscv/kernel/vector.c
> @@ -0,0 +1,89 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2023 SiFive
> + * Author: Andy Chiu <andy.chiu@sifive.com>
> + */
> +#include <linux/sched/signal.h>
> +#include <linux/types.h>
> +#include <linux/slab.h>
> +#include <linux/sched.h>
> +#include <linux/uaccess.h>
> +
> +#include <asm/thread_info.h>
> +#include <asm/processor.h>
> +#include <asm/insn.h>
> +#include <asm/vector.h>
> +#include <asm/ptrace.h>
> +#include <asm/bug.h>
> +
> +static bool insn_is_vector(u32 insn_buf)
> +{
> +	u32 opcode = insn_buf & __INSN_OPCODE_MASK;
> +	/*
> +	 * All V-related instructions, including CSR operations are 4-Byte. So,
> +	 * do not handle if the instruction length is not 4-Byte.
> +	 */
> +	if (unlikely(GET_INSN_LENGTH(insn_buf) != 4))
> +		return false;
> +	if (opcode == OPCODE_VECTOR) {
> +		return true;
> +	} else if (opcode == OPCODE_LOADFP || opcode == OPCODE_STOREFP) {
> +		u32 width = EXTRACT_LOAD_STORE_FP_WIDTH(insn_buf);
> +
> +		if (width == LSFP_WIDTH_RVV_8 || width == LSFP_WIDTH_RVV_16 ||
> +		    width == LSFP_WIDTH_RVV_32 || width == LSFP_WIDTH_RVV_64)
> +			return true;
> +	} else if (opcode == RVG_OPCODE_SYSTEM) {
> +		u32 csr = EXTRACT_SYSTEM_CSR(insn_buf);
> +
> +		if ((csr >= CSR_VSTART && csr <= CSR_VCSR) ||
> +		    (csr >= CSR_VL && csr <= CSR_VLENB))
> +			return true;
> +	}
> +	return false;
> +}
> +
> +int rvv_thread_zalloc(void)
> +{
> +	void *datap;
> +
> +	datap = kzalloc(riscv_vsize, GFP_KERNEL);
> +	if (!datap)
> +		return -ENOMEM;
> +	current->thread.vstate.datap = datap;
> +	memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_state,
> +						    datap));
> +	return 0;
> +}
> +
> +bool rvv_first_use_handler(struct pt_regs *regs)
> +{
> +	__user u32 *epc = (u32 *)regs->epc;
> +	u32 tval = (u32)regs->badaddr;
> +
> +	/* If V has been enabled then it is not the first-use trap */
> +	if (vstate_query(regs))
> +		return false;
> +	/* Get the instruction */
> +	if (!tval) {
> +		if (__get_user(tval, epc))
> +			return false;
> +	}
> +	/* Filter out non-V instructions */
> +	if (!insn_is_vector(tval))
> +		return false;
> +	/* Sanity check. datap should be null by the time of the first-use trap */
> +	WARN_ON(current->thread.vstate.datap);
> +	/*
> +	 * Now we sure that this is a V instruction. And it executes in the
> +	 * context where VS has been off. So, try to allocate the user's V
> +	 * context and resume execution.
> +	 */
> +	if (rvv_thread_zalloc()) {
> +		force_sig(SIGKILL);
> +		return true;
> +	}

Should the altstack size be taken into consideration, like x86 does in
validate_sigaltstack() (see __xstate_request_perm()).

Related; Would it make sense to implement sigaltstack_size_valid() for
riscv, analogous to x86?


Björn
Vineet Gupta Feb. 7, 2023, 9:18 p.m. UTC | #5
Hi Andy,

On 1/25/23 06:20, Andy Chiu wrote:
> +static bool insn_is_vector(u32 insn_buf)
> +{
> +	u32 opcode = insn_buf & __INSN_OPCODE_MASK;
> +	/*
> +	 * All V-related instructions, including CSR operations are 4-Byte. So,
> +	 * do not handle if the instruction length is not 4-Byte.
> +	 */
> +	if (unlikely(GET_INSN_LENGTH(insn_buf) != 4))
> +		return false;
> +	if (opcode == OPCODE_VECTOR) {
> +		return true;
> +	} else if (opcode == OPCODE_LOADFP || opcode == OPCODE_STOREFP) {
> +		u32 width = EXTRACT_LOAD_STORE_FP_WIDTH(insn_buf);
> +
> +		if (width == LSFP_WIDTH_RVV_8 || width == LSFP_WIDTH_RVV_16 ||
> +		    width == LSFP_WIDTH_RVV_32 || width == LSFP_WIDTH_RVV_64)
> +			return true;

What is the purpose of checking FP opcodes here ?

> +	} else if (opcode == RVG_OPCODE_SYSTEM) {
> +		u32 csr = EXTRACT_SYSTEM_CSR(insn_buf);
> +
> +		if ((csr >= CSR_VSTART && csr <= CSR_VCSR) ||
> +		    (csr >= CSR_VL && csr <= CSR_VLENB))
> +			return true;
> +	}
> +	return false;
> +}
Björn Töpel Feb. 8, 2023, 9:20 a.m. UTC | #6
Vineet Gupta <vineetg@rivosinc.com> writes:

> Hi Andy,
>
> On 1/25/23 06:20, Andy Chiu wrote:
>> +static bool insn_is_vector(u32 insn_buf)
>> +{
>> +	u32 opcode = insn_buf & __INSN_OPCODE_MASK;
>> +	/*
>> +	 * All V-related instructions, including CSR operations are 4-Byte. So,
>> +	 * do not handle if the instruction length is not 4-Byte.
>> +	 */
>> +	if (unlikely(GET_INSN_LENGTH(insn_buf) != 4))
>> +		return false;
>> +	if (opcode == OPCODE_VECTOR) {
>> +		return true;
>> +	} else if (opcode == OPCODE_LOADFP || opcode == OPCODE_STOREFP) {
>> +		u32 width = EXTRACT_LOAD_STORE_FP_WIDTH(insn_buf);
>> +
>> +		if (width == LSFP_WIDTH_RVV_8 || width == LSFP_WIDTH_RVV_16 ||
>> +		    width == LSFP_WIDTH_RVV_32 || width == LSFP_WIDTH_RVV_64)
>> +			return true;
>
> What is the purpose of checking FP opcodes here ?

From [1]: "The instructions in the vector extension fit under two
existing major opcodes (LOAD-FP and STORE-FP) and one new major opcode
(OP-V)."

[2] highlights the width encoding.

(And Zvamo is out from the spec, which used AMO,0x2f)

[1] https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#5-vector-instruction-formats
[2] https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc#73-vector-loadstore-width-encoding
Andy Chiu Feb. 10, 2023, noon UTC | #7
On Mon, Feb 6, 2023 at 9:40 PM Conor Dooley <conor@kernel.org> wrote:
> I suppose my question was "is it safe to warn and carry on, rather than disallow use of vector in this situation".
Yes, I think it is safe to warn and carry on. This is a check for
memory leak if future code did not allocate/free datap correctly.

Thanks,
Andy
Vineet Gupta Feb. 13, 2023, 10:54 p.m. UTC | #8
On 2/7/23 06:36, Björn Töpel wrote:
>> +bool rvv_first_use_handler(struct pt_regs *regs)
>> +{
>> +	__user u32 *epc = (u32 *)regs->epc;
>> +	u32 tval = (u32)regs->badaddr;
>> +
>> +	/* If V has been enabled then it is not the first-use trap */
>> +	if (vstate_query(regs))
>> +		return false;
>> +	/* Get the instruction */
>> +	if (!tval) {
>> +		if (__get_user(tval, epc))
>> +			return false;
>> +	}
>> +	/* Filter out non-V instructions */
>> +	if (!insn_is_vector(tval))
>> +		return false;
>> +	/* Sanity check. datap should be null by the time of the first-use trap */
>> +	WARN_ON(current->thread.vstate.datap);
>> +	/*
>> +	 * Now we sure that this is a V instruction. And it executes in the
>> +	 * context where VS has been off. So, try to allocate the user's V
>> +	 * context and resume execution.
>> +	 */
>> +	if (rvv_thread_zalloc()) {
>> +		force_sig(SIGKILL);
>> +		return true;
>> +	}
> Should the altstack size be taken into consideration, like x86 does in
> validate_sigaltstack() (see __xstate_request_perm()).

For a preexisting alternate stack ? Otherwise there is no 
"configuration" like x86 to cross-check against and V fault implies 
large'ish signal stack.
See below as well.

> Related; Would it make sense to implement sigaltstack_size_valid() for
> riscv, analogous to x86?

Indeed we need to do that for the case where alt stack is being setup, 
*after* V fault-on-first use.
But how to handle an existing alt stack which might not be big enough to 
handle V state ?

-Vineet
Björn Töpel Feb. 14, 2023, 6:43 a.m. UTC | #9
Vineet Gupta <vineetg@rivosinc.com> writes:

> On 2/7/23 06:36, Björn Töpel wrote:
>>> +bool rvv_first_use_handler(struct pt_regs *regs)
>>> +{
>>> +	__user u32 *epc = (u32 *)regs->epc;
>>> +	u32 tval = (u32)regs->badaddr;
>>> +
>>> +	/* If V has been enabled then it is not the first-use trap */
>>> +	if (vstate_query(regs))
>>> +		return false;
>>> +	/* Get the instruction */
>>> +	if (!tval) {
>>> +		if (__get_user(tval, epc))
>>> +			return false;
>>> +	}
>>> +	/* Filter out non-V instructions */
>>> +	if (!insn_is_vector(tval))
>>> +		return false;
>>> +	/* Sanity check. datap should be null by the time of the first-use trap */
>>> +	WARN_ON(current->thread.vstate.datap);
>>> +	/*
>>> +	 * Now we sure that this is a V instruction. And it executes in the
>>> +	 * context where VS has been off. So, try to allocate the user's V
>>> +	 * context and resume execution.
>>> +	 */
>>> +	if (rvv_thread_zalloc()) {
>>> +		force_sig(SIGKILL);
>>> +		return true;
>>> +	}
>> Should the altstack size be taken into consideration, like x86 does in
>> validate_sigaltstack() (see __xstate_request_perm()).
>
> For a preexisting alternate stack ?

Yes.

> Otherwise there is no 
> "configuration" like x86 to cross-check against and V fault implies 
> large'ish signal stack.
> See below as well.
>
>> Related; Would it make sense to implement sigaltstack_size_valid() for
>> riscv, analogous to x86?
>
> Indeed we need to do that for the case where alt stack is being setup, 
> *after* V fault-on-first use.
> But how to handle an existing alt stack which might not be big enough to 
> handle V state ?

What I'm getting at is a stricter check at the time of fault
(SIGILL/enable V) handling. If the *existing* altstack is not big
enough, kill the process -- similar to the rvv_thread_zalloc() handling
above.

So, two changes:

1. Disallow V-enablement if the existing altstack does not fit a V-sized
   frame.
2. Sanitize altstack changes when V is enabled.

Other than the altstack handling, I think the series is a good state! It
would great if we could see a v14 land in -next...


Björn
Andy Chiu Feb. 14, 2023, 3:36 p.m. UTC | #10
Hey Björn,

On Tue, Feb 14, 2023 at 2:43 PM Björn Töpel <bjorn@kernel.org> wrote:
> So, two changes:
>
> 1. Disallow V-enablement if the existing altstack does not fit a V-sized
>    frame.
This could potentially break old programs (non-V) that load new system
libraries (with V), If the program sets a small alt stack and takes
the fault in some libraries that use V. However, existing
implementation will also kill the process when the signal arrives,
finding insufficient stack frame in such cases. I'd choose the second
one if we only have these two options, because there is a chance that
the signal handler may not even run.
> 2. Sanitize altstack changes when V is enabled.
Yes, I'd like to have this. But it may be tricky when it comes to
deciding whether V is enabled, due to the first-use trap. If V is
commonly used in system libraries then it is likely that V will be
enabled before an user set an altstack. Sanitizing this case would be
easy and straightforward. But what if the user sets an altstack before
enabling V in the first-use trap? This could happen on a statically
program that has hand-written V routines. This takes us to the 1st
question above, should we fail the user program immediately if the
altstack is set too small?
>
> Other than the altstack handling, I think the series is a good state! It
> would great if we could see a v14 land in -next...
Thanks. I am reforming the v14 patch and hoping the same to happen soon too!

Cheers,
Andy
Björn Töpel Feb. 14, 2023, 4:50 p.m. UTC | #11
Andy Chiu <andy.chiu@sifive.com> writes:

> Hey Björn,
>
> On Tue, Feb 14, 2023 at 2:43 PM Björn Töpel <bjorn@kernel.org> wrote:
>> So, two changes:
>>
>> 1. Disallow V-enablement if the existing altstack does not fit a V-sized
>>    frame.
> This could potentially break old programs (non-V) that load new system
> libraries (with V), If the program sets a small alt stack and takes
> the fault in some libraries that use V. However, existing
> implementation will also kill the process when the signal arrives,
> finding insufficient stack frame in such cases. I'd choose the second
> one if we only have these two options, because there is a chance that
> the signal handler may not even run.

I think we might have different views here. A process has a pre-V, a and
post-V state. Is allowing a process to enter V without the correct
preconditions a good idea? Allow to run with V turned on, but not able
to correctly handle a signal (the stack is too small)?

This was the same argument that the Intel folks had when enabling
AMX. Sure, AMX requires *explicit* enablement, but same rules should
apply, no?

>> 2. Sanitize altstack changes when V is enabled.
> Yes, I'd like to have this. But it may be tricky when it comes to
> deciding whether V is enabled, due to the first-use trap. If V is
> commonly used in system libraries then it is likely that V will be
> enabled before an user set an altstack. Sanitizing this case would be
> easy and straightforward. But what if the user sets an altstack before
> enabling V in the first-use trap? This could happen on a statically
> program that has hand-written V routines. This takes us to the 1st
> question above, should we fail the user program immediately if the
> altstack is set too small?

For me it's obvious to fail (always) "if the altstack is too small to
enable V", because it allows to execute V without proper preconditions.

Personally, I prefer a stricter model. Only enter V if you can, and
after entering it disallow changing the altstack.

Then again, this is *my* opinion and concern. What do other people
think? I don't want to stall the series.

>>
>> Other than the altstack handling, I think the series is a good state! It
>> would great if we could see a v14 land in -next...
> Thanks. I am reforming the v14 patch and hoping the same to happen soon too!

Thank you for your hard work! It would be awesome to *finally* have
vector support in the kernel!


Björn
Vineet Gupta Feb. 14, 2023, 5:24 p.m. UTC | #12
On 2/14/23 08:50, Björn Töpel wrote:
> Andy Chiu <andy.chiu@sifive.com> writes:
>
>> Hey Björn,
>>
>> On Tue, Feb 14, 2023 at 2:43 PM Björn Töpel <bjorn@kernel.org> wrote:
>>> So, two changes:
>>>
>>> 1. Disallow V-enablement if the existing altstack does not fit a V-sized
>>>     frame.
>> This could potentially break old programs (non-V) that load new system
>> libraries (with V), If the program sets a small alt stack and takes
>> the fault in some libraries that use V. However, existing
>> implementation will also kill the process when the signal arrives,
>> finding insufficient stack frame in such cases. I'd choose the second
>> one if we only have these two options, because there is a chance that
>> the signal handler may not even run.
> I think we might have different views here. A process has a pre-V, a and
> post-V state. Is allowing a process to enter V without the correct
> preconditions a good idea? Allow to run with V turned on, but not able
> to correctly handle a signal (the stack is too small)?

The requirement is sane, but the issue is user experience: User trying 
to bring up some V code has no clue that deep in some startup code some 
alt stack had been setup and causing his process to be terminated on 
first V code.

>
> This was the same argument that the Intel folks had when enabling
> AMX. Sure, AMX requires *explicit* enablement, but same rules should
> apply, no?
>
>>> 2. Sanitize altstack changes when V is enabled.
>> Yes, I'd like to have this. But it may be tricky when it comes to
>> deciding whether V is enabled, due to the first-use trap. If V is
>> commonly used in system libraries then it is likely that V will be
>> enabled before an user set an altstack. Sanitizing this case would be
>> easy and straightforward.

Good. Lets have this in v14 as it seems reasonably easy to implement.

>> But what if the user sets an altstack before
>> enabling V in the first-use trap? This could happen on a statically
>> program that has hand-written V routines. This takes us to the 1st
>> question above, should we fail the user program immediately if the
>> altstack is set too small?

Please lets not cross threads. We discussed this already at top. While 
ideally required, seems tricky so lets start with post-V alt stack check.

> For me it's obvious to fail (always) "if the altstack is too small to
> enable V", because it allows to execute V without proper preconditions.
>
> Personally, I prefer a stricter model. Only enter V if you can, and
> after entering it disallow changing the altstack.
>
> Then again, this is *my* opinion and concern. What do other people
> think? I don't want to stall the series.

I concur that the alt stack checking requirements are sensible in the 
long run. We can add the obvious check for post-V case and see if there 
is a sane way to flag pre-V case to.


>
>>> Other than the altstack handling, I think the series is a good state! It
>>> would great if we could see a v14 land in -next...
>> Thanks. I am reforming the v14 patch and hoping the same to happen soon too!
> Thank you for your hard work! It would be awesome to *finally* have
> vector support in the kernel!

Indeed we've come a long way, lets push the gear so we can use the 
coming cycle to flesh out any changes for a possible 6.4 inclusion.

Thx,
-Vineet
Björn Töpel Feb. 15, 2023, 7:14 a.m. UTC | #13
Vineet Gupta <vineetg@rivosinc.com> writes:

> On 2/14/23 08:50, Björn Töpel wrote:
>> Andy Chiu <andy.chiu@sifive.com> writes:
>>
>>> Hey Björn,
>>>
>>> On Tue, Feb 14, 2023 at 2:43 PM Björn Töpel <bjorn@kernel.org> wrote:
>>>> So, two changes:
>>>>
>>>> 1. Disallow V-enablement if the existing altstack does not fit a V-sized
>>>>     frame.
>>> This could potentially break old programs (non-V) that load new system
>>> libraries (with V), If the program sets a small alt stack and takes
>>> the fault in some libraries that use V. However, existing
>>> implementation will also kill the process when the signal arrives,
>>> finding insufficient stack frame in such cases. I'd choose the second
>>> one if we only have these two options, because there is a chance that
>>> the signal handler may not even run.
>> I think we might have different views here. A process has a pre-V, a and
>> post-V state. Is allowing a process to enter V without the correct
>> preconditions a good idea? Allow to run with V turned on, but not able
>> to correctly handle a signal (the stack is too small)?
>
> The requirement is sane, but the issue is user experience: User trying 
> to bring up some V code has no clue that deep in some startup code some 
> alt stack had been setup and causing his process to be terminated on 
> first V code.
>
>>
>> This was the same argument that the Intel folks had when enabling
>> AMX. Sure, AMX requires *explicit* enablement, but same rules should
>> apply, no?
>>
>>>> 2. Sanitize altstack changes when V is enabled.
>>> Yes, I'd like to have this. But it may be tricky when it comes to
>>> deciding whether V is enabled, due to the first-use trap. If V is
>>> commonly used in system libraries then it is likely that V will be
>>> enabled before an user set an altstack. Sanitizing this case would be
>>> easy and straightforward.
>
> Good. Lets have this in v14 as it seems reasonably easy to implement.
>
>>> But what if the user sets an altstack before
>>> enabling V in the first-use trap? This could happen on a statically
>>> program that has hand-written V routines. This takes us to the 1st
>>> question above, should we fail the user program immediately if the
>>> altstack is set too small?
>
> Please lets not cross threads. We discussed this already at top. While 
> ideally required, seems tricky so lets start with post-V alt stack check.
>
>> For me it's obvious to fail (always) "if the altstack is too small to
>> enable V", because it allows to execute V without proper preconditions.
>>
>> Personally, I prefer a stricter model. Only enter V if you can, and
>> after entering it disallow changing the altstack.
>>
>> Then again, this is *my* opinion and concern. What do other people
>> think? I don't want to stall the series.
>
> I concur that the alt stack checking requirements are sensible in the 
> long run. We can add the obvious check for post-V case and see if there 
> is a sane way to flag pre-V case to.

Reasonable. @Andy does this resonate with you as well?


Björn
Andy Chiu Feb. 15, 2023, 2:39 p.m. UTC | #14
On Wed, Feb 15, 2023 at 3:14 PM Björn Töpel <bjorn@kernel.org> wrote:
>
> Vineet Gupta <vineetg@rivosinc.com> writes:
>
> > On 2/14/23 08:50, Björn Töpel wrote:
> >> Andy Chiu <andy.chiu@sifive.com> writes:
> >>
> >>> Hey Björn,
> >>>
> >>> On Tue, Feb 14, 2023 at 2:43 PM Björn Töpel <bjorn@kernel.org> wrote:
> >>>> So, two changes:
> >>>>
> >>>> 1. Disallow V-enablement if the existing altstack does not fit a V-sized
> >>>>     frame.
> >>> This could potentially break old programs (non-V) that load new system
> >>> libraries (with V), If the program sets a small alt stack and takes
> >>> the fault in some libraries that use V. However, existing
> >>> implementation will also kill the process when the signal arrives,
> >>> finding insufficient stack frame in such cases. I'd choose the second
> >>> one if we only have these two options, because there is a chance that
> >>> the signal handler may not even run.
> >> I think we might have different views here. A process has a pre-V, a and
> >> post-V state. Is allowing a process to enter V without the correct
> >> preconditions a good idea? Allow to run with V turned on, but not able
> >> to correctly handle a signal (the stack is too small)?
> >
> > The requirement is sane, but the issue is user experience: User trying
> > to bring up some V code has no clue that deep in some startup code some
> > alt stack had been setup and causing his process to be terminated on
> > first V code.
> >
> >>
> >> This was the same argument that the Intel folks had when enabling
> >> AMX. Sure, AMX requires *explicit* enablement, but same rules should
> >> apply, no?
> >>
> >>>> 2. Sanitize altstack changes when V is enabled.
> >>> Yes, I'd like to have this. But it may be tricky when it comes to
> >>> deciding whether V is enabled, due to the first-use trap. If V is
> >>> commonly used in system libraries then it is likely that V will be
> >>> enabled before an user set an altstack. Sanitizing this case would be
> >>> easy and straightforward.
> >
> > Good. Lets have this in v14 as it seems reasonably easy to implement.
> >
> >>> But what if the user sets an altstack before
> >>> enabling V in the first-use trap? This could happen on a statically
> >>> program that has hand-written V routines. This takes us to the 1st
> >>> question above, should we fail the user program immediately if the
> >>> altstack is set too small?
> >
> > Please lets not cross threads. We discussed this already at top. While
> > ideally required, seems tricky so lets start with post-V alt stack check.
> >
> >> For me it's obvious to fail (always) "if the altstack is too small to
> >> enable V", because it allows to execute V without proper preconditions.
> >>
> >> Personally, I prefer a stricter model. Only enter V if you can, and
> >> after entering it disallow changing the altstack.
> >>
> >> Then again, this is *my* opinion and concern. What do other people
> >> think? I don't want to stall the series.
> >
> > I concur that the alt stack checking requirements are sensible in the
> > long run. We can add the obvious check for post-V case and see if there
> > is a sane way to flag pre-V case to.
>
> Reasonable. @Andy does this resonate with you as well?
Yes, it makes sense to me. I am making this happen on v14 :)

Thanks,
Andy
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 25ef9c0b19e7..b1ef3617881f 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -133,6 +133,24 @@ 
 #define RVG_OPCODE_JALR		0x67
 #define RVG_OPCODE_JAL		0x6f
 #define RVG_OPCODE_SYSTEM	0x73
+#define RVG_SYSTEM_CSR_OFF	20
+#define RVG_SYSTEM_CSR_MASK	GENMASK(12, 0)
+
+/* parts of opcode for RVV */
+#define OPCODE_VECTOR		0x57
+#define LSFP_WIDTH_RVV_8	0
+#define LSFP_WIDTH_RVV_16	5
+#define LSFP_WIDTH_RVV_32	6
+#define LSFP_WIDTH_RVV_64	7
+
+/* parts of opcode for RVF, RVD and RVQ */
+#define LSFP_WIDTH_OFF		12
+#define LSFP_WIDTH_MASK		GENMASK(3, 0)
+#define LSFP_WIDTH_FP_W		2
+#define LSFP_WIDTH_FP_D		3
+#define LSFP_WIDTH_FP_Q		4
+#define OPCODE_LOADFP		0x07
+#define OPCODE_STOREFP		0x27
 
 /* parts of opcode for RVC*/
 #define RVC_OPCODE_C0		0x0
@@ -291,6 +309,12 @@  static __always_inline bool riscv_insn_is_branch(u32 code)
 	(RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \
 	(RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); })
 
+#define EXTRACT_LOAD_STORE_FP_WIDTH(x) \
+	({typeof(x) x_ = (x); RV_X(x_, LSFP_WIDTH_OFF, LSFP_WIDTH_MASK); })
+
+#define EXTRACT_SYSTEM_CSR(x) \
+	({typeof(x) x_ = (x); RV_X(x_, RVG_SYSTEM_CSR_OFF, RVG_SYSTEM_CSR_MASK); })
+
 /*
  * Get the immediate from a J-type instruction.
  *
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index f8a9e37c4374..7c77696d704a 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -19,6 +19,7 @@ 
 #define CSR_STR(x) __ASM_STR(x)
 
 extern unsigned long riscv_vsize;
+bool rvv_first_use_handler(struct pt_regs *regs);
 
 static __always_inline bool has_vector(void)
 {
@@ -138,6 +139,7 @@  static inline void vstate_restore(struct task_struct *task,
 struct pt_regs;
 
 static __always_inline bool has_vector(void) { return false; }
+static inline bool rvv_first_use_handler(struct pt_regs *regs) { return false; }
 static inline bool vstate_query(struct pt_regs *regs) { return false; }
 #define riscv_vsize (0)
 #define vstate_save(task, regs)		do {} while (0)
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 4cf303a779ab..48d345a5f326 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -55,6 +55,7 @@  obj-$(CONFIG_MMU) += vdso.o vdso/
 
 obj-$(CONFIG_RISCV_M_MODE)	+= traps_misaligned.o
 obj-$(CONFIG_FPU)		+= fpu.o
+obj-$(CONFIG_RISCV_ISA_V)	+= vector.o
 obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_SMP)		+= cpu_ops.o
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
new file mode 100644
index 000000000000..cdd58d1c8b3c
--- /dev/null
+++ b/arch/riscv/kernel/vector.c
@@ -0,0 +1,89 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 SiFive
+ * Author: Andy Chiu <andy.chiu@sifive.com>
+ */
+#include <linux/sched/signal.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+
+#include <asm/thread_info.h>
+#include <asm/processor.h>
+#include <asm/insn.h>
+#include <asm/vector.h>
+#include <asm/ptrace.h>
+#include <asm/bug.h>
+
+static bool insn_is_vector(u32 insn_buf)
+{
+	u32 opcode = insn_buf & __INSN_OPCODE_MASK;
+	/*
+	 * All V-related instructions, including CSR operations are 4-Byte. So,
+	 * do not handle if the instruction length is not 4-Byte.
+	 */
+	if (unlikely(GET_INSN_LENGTH(insn_buf) != 4))
+		return false;
+	if (opcode == OPCODE_VECTOR) {
+		return true;
+	} else if (opcode == OPCODE_LOADFP || opcode == OPCODE_STOREFP) {
+		u32 width = EXTRACT_LOAD_STORE_FP_WIDTH(insn_buf);
+
+		if (width == LSFP_WIDTH_RVV_8 || width == LSFP_WIDTH_RVV_16 ||
+		    width == LSFP_WIDTH_RVV_32 || width == LSFP_WIDTH_RVV_64)
+			return true;
+	} else if (opcode == RVG_OPCODE_SYSTEM) {
+		u32 csr = EXTRACT_SYSTEM_CSR(insn_buf);
+
+		if ((csr >= CSR_VSTART && csr <= CSR_VCSR) ||
+		    (csr >= CSR_VL && csr <= CSR_VLENB))
+			return true;
+	}
+	return false;
+}
+
+int rvv_thread_zalloc(void)
+{
+	void *datap;
+
+	datap = kzalloc(riscv_vsize, GFP_KERNEL);
+	if (!datap)
+		return -ENOMEM;
+	current->thread.vstate.datap = datap;
+	memset(&current->thread.vstate, 0, offsetof(struct __riscv_v_state,
+						    datap));
+	return 0;
+}
+
+bool rvv_first_use_handler(struct pt_regs *regs)
+{
+	__user u32 *epc = (u32 *)regs->epc;
+	u32 tval = (u32)regs->badaddr;
+
+	/* If V has been enabled then it is not the first-use trap */
+	if (vstate_query(regs))
+		return false;
+	/* Get the instruction */
+	if (!tval) {
+		if (__get_user(tval, epc))
+			return false;
+	}
+	/* Filter out non-V instructions */
+	if (!insn_is_vector(tval))
+		return false;
+	/* Sanity check. datap should be null by the time of the first-use trap */
+	WARN_ON(current->thread.vstate.datap);
+	/*
+	 * Now we sure that this is a V instruction. And it executes in the
+	 * context where VS has been off. So, try to allocate the user's V
+	 * context and resume execution.
+	 */
+	if (rvv_thread_zalloc()) {
+		force_sig(SIGKILL);
+		return true;
+	}
+	vstate_on(regs);
+	return true;
+}
+