diff mbox series

[v2,01/13] riscv: fix jal offsets in patched alternatives

Message ID 20221204174632.3677-2-jszhang@kernel.org (mailing list archive)
State Superseded
Delegated to: Palmer Dabbelt
Headers show
Series riscv: improve boot time isa extensions handling | expand

Checks

Context Check Description
conchuod/tree_selection fail Guessing tree name failed

Commit Message

Jisheng Zhang Dec. 4, 2022, 5:46 p.m. UTC
Alternatives live in a different section, so offsets used by jal
instruction will point to wrong locations after the patch got applied.

Similar to arm64, adjust the location to consider that offset.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
---
 arch/riscv/include/asm/alternative.h |  2 ++
 arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
 arch/riscv/kernel/cpufeature.c       |  3 +++
 3 files changed, 43 insertions(+)

Comments

Andrew Jones Dec. 5, 2022, 2:57 p.m. UTC | #1
On Mon, Dec 05, 2022 at 01:46:20AM +0800, Jisheng Zhang wrote:
> Alternatives live in a different section, so offsets used by jal
> instruction will point to wrong locations after the patch got applied.
> 
> Similar to arm64, adjust the location to consider that offset.
> 
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> ---
>  arch/riscv/include/asm/alternative.h |  2 ++
>  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
>  arch/riscv/kernel/cpufeature.c       |  3 +++
>  3 files changed, 43 insertions(+)
> 
> diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> index c58ec3cc4bc3..33eae9541684 100644
> --- a/arch/riscv/include/asm/alternative.h
> +++ b/arch/riscv/include/asm/alternative.h
> @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
>  
>  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
>  				      int patch_offset);
> +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> +			       int patch_offset);
>  
>  struct alt_entry {
>  	void *old_ptr;		 /* address of original instruciton or data  */
> diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> index 292cc42dc3be..9d88375624b5 100644
> --- a/arch/riscv/kernel/alternative.c
> +++ b/arch/riscv/kernel/alternative.c
> @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
>  	}
>  }
>  
> +#define to_jal_imm(value)						\
> +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
                                                                 ^ RV_J_IMM_10_1_OPOFF

> +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))

Should put () around value

> +
> +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> +			       int patch_offset)
> +{
> +	int num_instr = len / sizeof(u32);
> +	unsigned int call;
> +	int i;
> +	int imm;
> +
> +	for (i = 0; i < num_instr; i++) {
> +		u32 inst = riscv_instruction_at(alt_ptr, i);
> +
> +		if (!riscv_insn_is_jal(inst))
> +			continue;
> +
> +		/* get and adjust new target address */
> +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> +		imm -= patch_offset;
> +
> +		/* pick the original jal */
> +		call = inst;
> +
> +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> +		call &= ~GENMASK(31, 12);

It'd be nice if this had a define.

> +
> +		/* add the adapted IMMs */
> +		call |= to_jal_imm(imm);
> +
> +		/* patch the call place again */
> +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> +	}
> +}
> +
>  /*
>   * This is called very early in the boot process (directly after we run
>   * a feature detect on the boot CPU). No need to worry about other CPUs
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index ba62a4ff5ccd..c743f0adc794 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
>  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
>  							 alt->alt_len,
>  							 alt->old_ptr - alt->alt_ptr);
> +			riscv_alternative_fix_jal(alt->old_ptr,
> +						  alt->alt_len,
> +						  alt->old_ptr - alt->alt_ptr);
>  		}
>  	}
>  }
> -- 
> 2.37.2
>

Thanks,
drew
Heiko Stuebner Dec. 5, 2022, 3:31 p.m. UTC | #2
Am Sonntag, 4. Dezember 2022, 18:46:20 CET schrieb Jisheng Zhang:
> Alternatives live in a different section, so offsets used by jal
> instruction will point to wrong locations after the patch got applied.
> 
> Similar to arm64, adjust the location to consider that offset.
> 
> Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> ---
>  arch/riscv/include/asm/alternative.h |  2 ++
>  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
>  arch/riscv/kernel/cpufeature.c       |  3 +++
>  3 files changed, 43 insertions(+)
> 
> diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> index c58ec3cc4bc3..33eae9541684 100644
> --- a/arch/riscv/include/asm/alternative.h
> +++ b/arch/riscv/include/asm/alternative.h
> @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
>  
>  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
>  				      int patch_offset);
> +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> +			       int patch_offset);
>  
>  struct alt_entry {
>  	void *old_ptr;		 /* address of original instruciton or data  */
> diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> index 292cc42dc3be..9d88375624b5 100644
> --- a/arch/riscv/kernel/alternative.c
> +++ b/arch/riscv/kernel/alternative.c
> @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
>  	}
>  }
>  
> +#define to_jal_imm(value)						\
> +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
> +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
> +
> +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> +			       int patch_offset)
> +{

I think we might want to unfiy this into a common function like

	riscv_alternative_fix_offsets(...)

so that we only run through the code block once

	for (i = 0; i < num_instr; i++) {
		if (riscv_insn_is_auipc_jalr(inst1, inst2)) {
			riscv_alternative_fix_auipc_jalr(...)
			continue;
		}

		if (riscv_insn_is_jal(inst)) {
			riscv_alternative_fix_jal(...)
			continue;
		}
	}

This would also remove the need from calling multiple functions
after patching alternatives.

Thoughts?


Heiko

> +	int num_instr = len / sizeof(u32);
> +	unsigned int call;
> +	int i;
> +	int imm;
> +
> +	for (i = 0; i < num_instr; i++) {
> +		u32 inst = riscv_instruction_at(alt_ptr, i);
> +
> +		if (!riscv_insn_is_jal(inst))
> +			continue;
> +
> +		/* get and adjust new target address */
> +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> +		imm -= patch_offset;
> +
> +		/* pick the original jal */
> +		call = inst;
> +
> +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> +		call &= ~GENMASK(31, 12);
> +
> +		/* add the adapted IMMs */
> +		call |= to_jal_imm(imm);
> +
> +		/* patch the call place again */
> +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> +	}
> +}
> +
>  /*
>   * This is called very early in the boot process (directly after we run
>   * a feature detect on the boot CPU). No need to worry about other CPUs
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index ba62a4ff5ccd..c743f0adc794 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
>  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
>  							 alt->alt_len,
>  							 alt->old_ptr - alt->alt_ptr);
> +			riscv_alternative_fix_jal(alt->old_ptr,
> +						  alt->alt_len,
> +						  alt->old_ptr - alt->alt_ptr);
>  		}
>  	}
>  }
>
Jisheng Zhang Dec. 5, 2022, 3:34 p.m. UTC | #3
On Mon, Dec 05, 2022 at 03:57:10PM +0100, Andrew Jones wrote:
> On Mon, Dec 05, 2022 at 01:46:20AM +0800, Jisheng Zhang wrote:
> > Alternatives live in a different section, so offsets used by jal
> > instruction will point to wrong locations after the patch got applied.
> > 
> > Similar to arm64, adjust the location to consider that offset.
> > 
> > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > ---
> >  arch/riscv/include/asm/alternative.h |  2 ++
> >  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
> >  arch/riscv/kernel/cpufeature.c       |  3 +++
> >  3 files changed, 43 insertions(+)
> > 
> > diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> > index c58ec3cc4bc3..33eae9541684 100644
> > --- a/arch/riscv/include/asm/alternative.h
> > +++ b/arch/riscv/include/asm/alternative.h
> > @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
> >  
> >  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> >  				      int patch_offset);
> > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > +			       int patch_offset);
> >  
> >  struct alt_entry {
> >  	void *old_ptr;		 /* address of original instruciton or data  */
> > diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> > index 292cc42dc3be..9d88375624b5 100644
> > --- a/arch/riscv/kernel/alternative.c
> > +++ b/arch/riscv/kernel/alternative.c
> > @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> >  	}
> >  }
> >  
> > +#define to_jal_imm(value)						\
> > +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
>                                                                  ^ RV_J_IMM_10_1_OPOFF

Good catch! I was lucky when testing the whole series ;) will fix it in
newer version.

> 
> > +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> > +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> > +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
> 
> Should put () around value

good idea, previously, I just follow to_jalr_imm(), will update it in
newer version.

> 
> > +
> > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > +			       int patch_offset)
> > +{
> > +	int num_instr = len / sizeof(u32);
> > +	unsigned int call;
> > +	int i;
> > +	int imm;
> > +
> > +	for (i = 0; i < num_instr; i++) {
> > +		u32 inst = riscv_instruction_at(alt_ptr, i);
> > +
> > +		if (!riscv_insn_is_jal(inst))
> > +			continue;
> > +
> > +		/* get and adjust new target address */
> > +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> > +		imm -= patch_offset;
> > +
> > +		/* pick the original jal */
> > +		call = inst;
> > +
> > +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> > +		call &= ~GENMASK(31, 12);
> 
> It'd be nice if this had a define.
> 
> > +
> > +		/* add the adapted IMMs */
> > +		call |= to_jal_imm(imm);
> > +
> > +		/* patch the call place again */
> > +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> > +	}
> > +}
> > +
> >  /*
> >   * This is called very early in the boot process (directly after we run
> >   * a feature detect on the boot CPU). No need to worry about other CPUs
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index ba62a4ff5ccd..c743f0adc794 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> >  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> >  							 alt->alt_len,
> >  							 alt->old_ptr - alt->alt_ptr);
> > +			riscv_alternative_fix_jal(alt->old_ptr,
> > +						  alt->alt_len,
> > +						  alt->old_ptr - alt->alt_ptr);
> >  		}
> >  	}
> >  }
> > -- 
> > 2.37.2
> >
> 
> Thanks,
> drew
Jisheng Zhang Dec. 5, 2022, 3:40 p.m. UTC | #4
On Mon, Dec 05, 2022 at 04:31:08PM +0100, Heiko Stübner wrote:
> Am Sonntag, 4. Dezember 2022, 18:46:20 CET schrieb Jisheng Zhang:
> > Alternatives live in a different section, so offsets used by jal
> > instruction will point to wrong locations after the patch got applied.
> > 
> > Similar to arm64, adjust the location to consider that offset.
> > 
> > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > ---
> >  arch/riscv/include/asm/alternative.h |  2 ++
> >  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
> >  arch/riscv/kernel/cpufeature.c       |  3 +++
> >  3 files changed, 43 insertions(+)
> > 
> > diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> > index c58ec3cc4bc3..33eae9541684 100644
> > --- a/arch/riscv/include/asm/alternative.h
> > +++ b/arch/riscv/include/asm/alternative.h
> > @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
> >  
> >  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> >  				      int patch_offset);
> > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > +			       int patch_offset);
> >  
> >  struct alt_entry {
> >  	void *old_ptr;		 /* address of original instruciton or data  */
> > diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> > index 292cc42dc3be..9d88375624b5 100644
> > --- a/arch/riscv/kernel/alternative.c
> > +++ b/arch/riscv/kernel/alternative.c
> > @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> >  	}
> >  }
> >  
> > +#define to_jal_imm(value)						\
> > +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
> > +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> > +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> > +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
> > +
> > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > +			       int patch_offset)
> > +{
> 
> I think we might want to unfiy this into a common function like
> 
> 	riscv_alternative_fix_offsets(...)
> 
> so that we only run through the code block once
> 
> 	for (i = 0; i < num_instr; i++) {
> 		if (riscv_insn_is_auipc_jalr(inst1, inst2)) {
> 			riscv_alternative_fix_auipc_jalr(...)
> 			continue;
> 		}
> 
> 		if (riscv_insn_is_jal(inst)) {
> 			riscv_alternative_fix_jal(...)
> 			continue;
> 		}
> 	}
> 
> This would also remove the need from calling multiple functions
> after patching alternatives.

Yesterday, I also wanted to unify the two instruction fix into
one. But that would need to roll back the
riscv_alternative_fix_auipc_jalr() to your v1 version. And IMHO,
it's better if you can split the Zbb string optimizations series
into two: one for alternative improvements, another for Zbb. Then
we may get the alternative improvements and this inst extension
series merged in v6.2-rc1.

> 
> Thoughts?
> 
> 
> Heiko
> 
> > +	int num_instr = len / sizeof(u32);
> > +	unsigned int call;
> > +	int i;
> > +	int imm;
> > +
> > +	for (i = 0; i < num_instr; i++) {
> > +		u32 inst = riscv_instruction_at(alt_ptr, i);
> > +
> > +		if (!riscv_insn_is_jal(inst))
> > +			continue;
> > +
> > +		/* get and adjust new target address */
> > +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> > +		imm -= patch_offset;
> > +
> > +		/* pick the original jal */
> > +		call = inst;
> > +
> > +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> > +		call &= ~GENMASK(31, 12);
> > +
> > +		/* add the adapted IMMs */
> > +		call |= to_jal_imm(imm);
> > +
> > +		/* patch the call place again */
> > +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> > +	}
> > +}
> > +
> >  /*
> >   * This is called very early in the boot process (directly after we run
> >   * a feature detect on the boot CPU). No need to worry about other CPUs
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index ba62a4ff5ccd..c743f0adc794 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> >  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> >  							 alt->alt_len,
> >  							 alt->old_ptr - alt->alt_ptr);
> > +			riscv_alternative_fix_jal(alt->old_ptr,
> > +						  alt->alt_len,
> > +						  alt->old_ptr - alt->alt_ptr);
> >  		}
> >  	}
> >  }
> > 
> 
> 
> 
>
Jisheng Zhang Dec. 5, 2022, 4:42 p.m. UTC | #5
On Mon, Dec 05, 2022 at 03:57:10PM +0100, Andrew Jones wrote:
> On Mon, Dec 05, 2022 at 01:46:20AM +0800, Jisheng Zhang wrote:
> > Alternatives live in a different section, so offsets used by jal
> > instruction will point to wrong locations after the patch got applied.
> > 
> > Similar to arm64, adjust the location to consider that offset.
> > 
> > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > ---
> >  arch/riscv/include/asm/alternative.h |  2 ++
> >  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
> >  arch/riscv/kernel/cpufeature.c       |  3 +++
> >  3 files changed, 43 insertions(+)
> > 
> > diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> > index c58ec3cc4bc3..33eae9541684 100644
> > --- a/arch/riscv/include/asm/alternative.h
> > +++ b/arch/riscv/include/asm/alternative.h
> > @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
> >  
> >  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> >  				      int patch_offset);
> > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > +			       int patch_offset);
> >  
> >  struct alt_entry {
> >  	void *old_ptr;		 /* address of original instruciton or data  */
> > diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> > index 292cc42dc3be..9d88375624b5 100644
> > --- a/arch/riscv/kernel/alternative.c
> > +++ b/arch/riscv/kernel/alternative.c
> > @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> >  	}
> >  }
> >  
> > +#define to_jal_imm(value)						\
> > +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
>                                                                  ^ RV_J_IMM_10_1_OPOFF
> 
> > +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> > +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> > +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))

Hi all,

I believe there's bug in the to_jal_imm() macro implementation, the
correct one should be like this:

#define to_jal_imm(value)                                               \
        ((RV_X(value, RV_J_IMM_10_1_OFF, RV_J_IMM_10_1_MASK) << RV_J_IMM_10_1_OPOFF) | \
        (RV_X(value, RV_J_IMM_11_OFF, RV_J_IMM_11_MASK) << RV_J_IMM_11_OPOFF) | \
        (RV_X(value, RV_J_IMM_19_12_OFF, RV_J_IMM_19_12_MASK) << RV_J_IMM_19_12_OPOFF) | \
        (RV_X(value, RV_J_IMM_SIGN_OFF, 1) << RV_J_IMM_SIGN_OPOFF))

Will fix it in next version.

Thanks
> 
> Should put () around value
> 
> > +
> > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > +			       int patch_offset)
> > +{
> > +	int num_instr = len / sizeof(u32);
> > +	unsigned int call;
> > +	int i;
> > +	int imm;
> > +
> > +	for (i = 0; i < num_instr; i++) {
> > +		u32 inst = riscv_instruction_at(alt_ptr, i);
> > +
> > +		if (!riscv_insn_is_jal(inst))
> > +			continue;
> > +
> > +		/* get and adjust new target address */
> > +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> > +		imm -= patch_offset;
> > +
> > +		/* pick the original jal */
> > +		call = inst;
> > +
> > +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> > +		call &= ~GENMASK(31, 12);
> 
> It'd be nice if this had a define.
> 
> > +
> > +		/* add the adapted IMMs */
> > +		call |= to_jal_imm(imm);
> > +
> > +		/* patch the call place again */
> > +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> > +	}
> > +}
> > +
> >  /*
> >   * This is called very early in the boot process (directly after we run
> >   * a feature detect on the boot CPU). No need to worry about other CPUs
> > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > index ba62a4ff5ccd..c743f0adc794 100644
> > --- a/arch/riscv/kernel/cpufeature.c
> > +++ b/arch/riscv/kernel/cpufeature.c
> > @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> >  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> >  							 alt->alt_len,
> >  							 alt->old_ptr - alt->alt_ptr);
> > +			riscv_alternative_fix_jal(alt->old_ptr,
> > +						  alt->alt_len,
> > +						  alt->old_ptr - alt->alt_ptr);
> >  		}
> >  	}
> >  }
> > -- 
> > 2.37.2
> >
> 
> Thanks,
> drew
Jisheng Zhang Dec. 5, 2022, 4:49 p.m. UTC | #6
On Tue, Dec 06, 2022 at 12:42:16AM +0800, Jisheng Zhang wrote:
> On Mon, Dec 05, 2022 at 03:57:10PM +0100, Andrew Jones wrote:
> > On Mon, Dec 05, 2022 at 01:46:20AM +0800, Jisheng Zhang wrote:
> > > Alternatives live in a different section, so offsets used by jal
> > > instruction will point to wrong locations after the patch got applied.
> > > 
> > > Similar to arm64, adjust the location to consider that offset.
> > > 
> > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > ---
> > >  arch/riscv/include/asm/alternative.h |  2 ++
> > >  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
> > >  arch/riscv/kernel/cpufeature.c       |  3 +++
> > >  3 files changed, 43 insertions(+)
> > > 
> > > diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> > > index c58ec3cc4bc3..33eae9541684 100644
> > > --- a/arch/riscv/include/asm/alternative.h
> > > +++ b/arch/riscv/include/asm/alternative.h
> > > @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
> > >  
> > >  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > >  				      int patch_offset);
> > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > +			       int patch_offset);
> > >  
> > >  struct alt_entry {
> > >  	void *old_ptr;		 /* address of original instruciton or data  */
> > > diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> > > index 292cc42dc3be..9d88375624b5 100644
> > > --- a/arch/riscv/kernel/alternative.c
> > > +++ b/arch/riscv/kernel/alternative.c
> > > @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > >  	}
> > >  }
> > >  
> > > +#define to_jal_imm(value)						\
> > > +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
> >                                                                  ^ RV_J_IMM_10_1_OPOFF
> > 
> > > +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> > > +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> > > +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
> 
> Hi all,
> 
> I believe there's bug in the to_jal_imm() macro implementation, the
> correct one should be like this:
> 
> #define to_jal_imm(value)                                               \
>         ((RV_X(value, RV_J_IMM_10_1_OFF, RV_J_IMM_10_1_MASK) << RV_J_IMM_10_1_OPOFF) | \
>         (RV_X(value, RV_J_IMM_11_OFF, RV_J_IMM_11_MASK) << RV_J_IMM_11_OPOFF) | \
>         (RV_X(value, RV_J_IMM_19_12_OFF, RV_J_IMM_19_12_MASK) << RV_J_IMM_19_12_OPOFF) | \
>         (RV_X(value, RV_J_IMM_SIGN_OFF, 1) << RV_J_IMM_SIGN_OPOFF))

And I just tested to_jal_imm() vs RV_EXTRACT_JTYPE_IMM(), they match perfectly.
E.g:
RV_EXTRACT_JTYPE_IMM(to_jal_imm(imm)) == imm is alway true when imm is a jal
valid offset.

> 
> Will fix it in next version.
> 
> Thanks
> > 
> > Should put () around value
> > 
> > > +
> > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > +			       int patch_offset)
> > > +{
> > > +	int num_instr = len / sizeof(u32);
> > > +	unsigned int call;
> > > +	int i;
> > > +	int imm;
> > > +
> > > +	for (i = 0; i < num_instr; i++) {
> > > +		u32 inst = riscv_instruction_at(alt_ptr, i);
> > > +
> > > +		if (!riscv_insn_is_jal(inst))
> > > +			continue;
> > > +
> > > +		/* get and adjust new target address */
> > > +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> > > +		imm -= patch_offset;
> > > +
> > > +		/* pick the original jal */
> > > +		call = inst;
> > > +
> > > +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> > > +		call &= ~GENMASK(31, 12);
> > 
> > It'd be nice if this had a define.
> > 
> > > +
> > > +		/* add the adapted IMMs */
> > > +		call |= to_jal_imm(imm);
> > > +
> > > +		/* patch the call place again */
> > > +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> > > +	}
> > > +}
> > > +
> > >  /*
> > >   * This is called very early in the boot process (directly after we run
> > >   * a feature detect on the boot CPU). No need to worry about other CPUs
> > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > index ba62a4ff5ccd..c743f0adc794 100644
> > > --- a/arch/riscv/kernel/cpufeature.c
> > > +++ b/arch/riscv/kernel/cpufeature.c
> > > @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > >  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > >  							 alt->alt_len,
> > >  							 alt->old_ptr - alt->alt_ptr);
> > > +			riscv_alternative_fix_jal(alt->old_ptr,
> > > +						  alt->alt_len,
> > > +						  alt->old_ptr - alt->alt_ptr);
> > >  		}
> > >  	}
> > >  }
> > > -- 
> > > 2.37.2
> > >
> > 
> > Thanks,
> > drew
Conor Dooley Dec. 5, 2022, 6:36 p.m. UTC | #7
Heiko, Jisheng,

On Mon, Dec 05, 2022 at 11:40:44PM +0800, Jisheng Zhang wrote:
> On Mon, Dec 05, 2022 at 04:31:08PM +0100, Heiko Stübner wrote:
> > Am Sonntag, 4. Dezember 2022, 18:46:20 CET schrieb Jisheng Zhang:
> > > Alternatives live in a different section, so offsets used by jal
> > > instruction will point to wrong locations after the patch got applied.
> > > 
> > > Similar to arm64, adjust the location to consider that offset.
> > > 
> > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > ---
> > >  arch/riscv/include/asm/alternative.h |  2 ++
> > >  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
> > >  arch/riscv/kernel/cpufeature.c       |  3 +++
> > >  3 files changed, 43 insertions(+)
> > > 
> > > diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> > > index c58ec3cc4bc3..33eae9541684 100644
> > > --- a/arch/riscv/include/asm/alternative.h
> > > +++ b/arch/riscv/include/asm/alternative.h
> > > @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
> > >  
> > >  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > >  				      int patch_offset);
> > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > +			       int patch_offset);
> > >  
> > >  struct alt_entry {
> > >  	void *old_ptr;		 /* address of original instruciton or data  */
> > > diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> > > index 292cc42dc3be..9d88375624b5 100644
> > > --- a/arch/riscv/kernel/alternative.c
> > > +++ b/arch/riscv/kernel/alternative.c
> > > @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > >  	}
> > >  }
> > >  
> > > +#define to_jal_imm(value)						\
> > > +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
> > > +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> > > +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> > > +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
> > > +
> > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > +			       int patch_offset)
> > > +{
> > 
> > I think we might want to unfiy this into a common function like
> > 
> > 	riscv_alternative_fix_offsets(...)
> > 
> > so that we only run through the code block once
> > 
> > 	for (i = 0; i < num_instr; i++) {
> > 		if (riscv_insn_is_auipc_jalr(inst1, inst2)) {
> > 			riscv_alternative_fix_auipc_jalr(...)
> > 			continue;
> > 		}
> > 
> > 		if (riscv_insn_is_jal(inst)) {
> > 			riscv_alternative_fix_jal(...)
> > 			continue;
> > 		}
> > 	}
> > 
> > This would also remove the need from calling multiple functions
> > after patching alternatives.
> 
> Yesterday, I also wanted to unify the two instruction fix into
> one. But that would need to roll back the
> riscv_alternative_fix_auipc_jalr() to your v1 version. And IMHO,
> it's better if you can split the Zbb string optimizations series
> into two: one for alternative improvements, another for Zbb. Then
> we may get the alternative improvements and this inst extension
> series merged in v6.2-rc1.

Heiko, perhaps you can correct me here:

Last Wednesday you & Palmer agreed that it was too late in the cycle to
apply any of the stuff touching alternatives?
If I do recall correctly, gives plenty of time to sort out any
interdependent changes here.

Could easily be misremembering, wouldn't be the first time!

Thanks,
Conor.

> > > +	int num_instr = len / sizeof(u32);
> > > +	unsigned int call;
> > > +	int i;
> > > +	int imm;
> > > +
> > > +	for (i = 0; i < num_instr; i++) {
> > > +		u32 inst = riscv_instruction_at(alt_ptr, i);
> > > +
> > > +		if (!riscv_insn_is_jal(inst))
> > > +			continue;
> > > +
> > > +		/* get and adjust new target address */
> > > +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> > > +		imm -= patch_offset;
> > > +
> > > +		/* pick the original jal */
> > > +		call = inst;
> > > +
> > > +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> > > +		call &= ~GENMASK(31, 12);
> > > +
> > > +		/* add the adapted IMMs */
> > > +		call |= to_jal_imm(imm);
> > > +
> > > +		/* patch the call place again */
> > > +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> > > +	}
> > > +}
> > > +
> > >  /*
> > >   * This is called very early in the boot process (directly after we run
> > >   * a feature detect on the boot CPU). No need to worry about other CPUs
> > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > index ba62a4ff5ccd..c743f0adc794 100644
> > > --- a/arch/riscv/kernel/cpufeature.c
> > > +++ b/arch/riscv/kernel/cpufeature.c
> > > @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > >  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > >  							 alt->alt_len,
> > >  							 alt->old_ptr - alt->alt_ptr);
> > > +			riscv_alternative_fix_jal(alt->old_ptr,
> > > +						  alt->alt_len,
> > > +						  alt->old_ptr - alt->alt_ptr);
> > >  		}
> > >  	}
> > >  }
> > > 
> > 
> > 
> > 
> >
Heiko Stuebner Dec. 5, 2022, 6:49 p.m. UTC | #8
Am Montag, 5. Dezember 2022, 19:36:45 CET schrieb Conor Dooley:
> Heiko, Jisheng,
> 
> On Mon, Dec 05, 2022 at 11:40:44PM +0800, Jisheng Zhang wrote:
> > On Mon, Dec 05, 2022 at 04:31:08PM +0100, Heiko Stübner wrote:
> > > Am Sonntag, 4. Dezember 2022, 18:46:20 CET schrieb Jisheng Zhang:
> > > > Alternatives live in a different section, so offsets used by jal
> > > > instruction will point to wrong locations after the patch got applied.
> > > > 
> > > > Similar to arm64, adjust the location to consider that offset.
> > > > 
> > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > > ---
> > > >  arch/riscv/include/asm/alternative.h |  2 ++
> > > >  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
> > > >  arch/riscv/kernel/cpufeature.c       |  3 +++
> > > >  3 files changed, 43 insertions(+)
> > > > 
> > > > diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> > > > index c58ec3cc4bc3..33eae9541684 100644
> > > > --- a/arch/riscv/include/asm/alternative.h
> > > > +++ b/arch/riscv/include/asm/alternative.h
> > > > @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
> > > >  
> > > >  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > > >  				      int patch_offset);
> > > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > > +			       int patch_offset);
> > > >  
> > > >  struct alt_entry {
> > > >  	void *old_ptr;		 /* address of original instruciton or data  */
> > > > diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> > > > index 292cc42dc3be..9d88375624b5 100644
> > > > --- a/arch/riscv/kernel/alternative.c
> > > > +++ b/arch/riscv/kernel/alternative.c
> > > > @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > > >  	}
> > > >  }
> > > >  
> > > > +#define to_jal_imm(value)						\
> > > > +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
> > > > +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> > > > +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> > > > +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
> > > > +
> > > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > > +			       int patch_offset)
> > > > +{
> > > 
> > > I think we might want to unfiy this into a common function like
> > > 
> > > 	riscv_alternative_fix_offsets(...)
> > > 
> > > so that we only run through the code block once
> > > 
> > > 	for (i = 0; i < num_instr; i++) {
> > > 		if (riscv_insn_is_auipc_jalr(inst1, inst2)) {
> > > 			riscv_alternative_fix_auipc_jalr(...)
> > > 			continue;
> > > 		}
> > > 
> > > 		if (riscv_insn_is_jal(inst)) {
> > > 			riscv_alternative_fix_jal(...)
> > > 			continue;
> > > 		}
> > > 	}
> > > 
> > > This would also remove the need from calling multiple functions
> > > after patching alternatives.
> > 
> > Yesterday, I also wanted to unify the two instruction fix into
> > one. But that would need to roll back the
> > riscv_alternative_fix_auipc_jalr() to your v1 version. And IMHO,
> > it's better if you can split the Zbb string optimizations series
> > into two: one for alternative improvements, another for Zbb. Then
> > we may get the alternative improvements and this inst extension
> > series merged in v6.2-rc1.
> 
> Heiko, perhaps you can correct me here:
> 
> Last Wednesday you & Palmer agreed that it was too late in the cycle to
> apply any of the stuff touching alternatives?
> If I do recall correctly, gives plenty of time to sort out any
> interdependent changes here.
> 
> Could easily be misremembering, wouldn't be the first time!

You slightly misremembered, but are still correct with the above ;-) .

I.e. what we talked about was stuff for fixes for 6.1-rc, were Palmers
wisely wanted to limit additions to really easy fixes for the remaining
last rc, to not upset any existing boards.

But you are still correct that we also shouldn't target the 6.2 merge window
anymore :-) .

We're after -rc8 now (which is in itself uncommon) and in his -rc7
announcement [0], Linus stated

"[...] the usual rule is that things that I get sent for the
merge window should have been all ready _before_ the merge window
opened. But with the merge window happening largely during the holiday
season, I'll just be enforcing that pretty strictly."

That means new stuff should be reviewed and in linux-next _way before_ the
merge window opens next weekend. Taking into account that people need
to review stuff (and maybe the series needing another round), I really don't
see this happening this week and everything else will get us shouted at
from atop a christmas tree ;-) .

That's the reason most maintainer-trees stop accepting stuff after -rc7


Heiko


[0] https://lkml.org/lkml/2022/11/27/278
Conor Dooley Dec. 5, 2022, 7:49 p.m. UTC | #9
On Mon, Dec 05, 2022 at 07:49:01PM +0100, Heiko Stübner wrote:
> Am Montag, 5. Dezember 2022, 19:36:45 CET schrieb Conor Dooley:
> > Heiko, Jisheng,
> > On Mon, Dec 05, 2022 at 11:40:44PM +0800, Jisheng Zhang wrote:
> > > Yesterday, I also wanted to unify the two instruction fix into
> > > one. But that would need to roll back the
> > > riscv_alternative_fix_auipc_jalr() to your v1 version. And IMHO,
> > > it's better if you can split the Zbb string optimizations series
> > > into two: one for alternative improvements, another for Zbb. Then
> > > we may get the alternative improvements and this inst extension
> > > series merged in v6.2-rc1.
> > 
> > Heiko, perhaps you can correct me here:
> > 
> > Last Wednesday you & Palmer agreed that it was too late in the cycle to
> > apply any of the stuff touching alternatives?
> > If I do recall correctly, gives plenty of time to sort out any
> > interdependent changes here.
> > 
> > Could easily be misremembering, wouldn't be the first time!
> 
> You slightly misremembered, but are still correct with the above ;-) .
> 
> I.e. what we talked about was stuff for fixes for 6.1-rc, were Palmers
> wisely wanted to limit additions to really easy fixes for the remaining
> last rc, to not upset any existing boards.

Ahh right. I was 50-50 on whether something like that was said so at
least I am not going crazy.

> But you are still correct that we also shouldn't target the 6.2 merge window
> anymore :-) .
> 
> We're after -rc8 now (which is in itself uncommon) and in his -rc7
> announcement [0], Linus stated
> 
> "[...] the usual rule is that things that I get sent for the
> merge window should have been all ready _before_ the merge window
> opened. But with the merge window happening largely during the holiday
> season, I'll just be enforcing that pretty strictly."

Yah, of all the windows to land patchsets that are being re-spun a few
days before it opens this probably isn't the best one to pick!

> That means new stuff should be reviewed and in linux-next _way before_ the
> merge window opens next weekend. Taking into account that people need
> to review stuff (and maybe the series needing another round), I really don't
> see this happening this week and everything else will get us shouted at
> from atop a christmas tree ;-) .
> 
> That's the reason most maintainer-trees stop accepting stuff after -rc7

Aye, in RISC-V land maybe we will get there one day :)

For the original question though, breaking them up into 3 or 4 smaller
bits that could get applied on their own is probably a good idea?

Between yourselves, Drew and Prabhakar there's a couple series touching
the same bits. Certainly don't want to seem like I am speaking for the
Higher Powers here, but some sort of logical ordering would probably be
a good idea so as not to hold each other up?
The non-string bit of your series has been fairly well reviewed & would,
in theory, be mergeable once the tree re-opens? Timing aside, Jisheng's
idea seems like a good one, no?
Heiko Stuebner Dec. 6, 2022, 12:39 a.m. UTC | #10
Am Montag, 5. Dezember 2022, 20:49:26 CET schrieb Conor Dooley:
> On Mon, Dec 05, 2022 at 07:49:01PM +0100, Heiko Stübner wrote:
> > Am Montag, 5. Dezember 2022, 19:36:45 CET schrieb Conor Dooley:
> > > Heiko, Jisheng,
> > > On Mon, Dec 05, 2022 at 11:40:44PM +0800, Jisheng Zhang wrote:
> > > > Yesterday, I also wanted to unify the two instruction fix into
> > > > one. But that would need to roll back the
> > > > riscv_alternative_fix_auipc_jalr() to your v1 version. And IMHO,
> > > > it's better if you can split the Zbb string optimizations series
> > > > into two: one for alternative improvements, another for Zbb. Then
> > > > we may get the alternative improvements and this inst extension
> > > > series merged in v6.2-rc1.
> > > 
> > > Heiko, perhaps you can correct me here:
> > > 
> > > Last Wednesday you & Palmer agreed that it was too late in the cycle to
> > > apply any of the stuff touching alternatives?
> > > If I do recall correctly, gives plenty of time to sort out any
> > > interdependent changes here.
> > > 
> > > Could easily be misremembering, wouldn't be the first time!
> > 
> > You slightly misremembered, but are still correct with the above ;-) .
> > 
> > I.e. what we talked about was stuff for fixes for 6.1-rc, were Palmers
> > wisely wanted to limit additions to really easy fixes for the remaining
> > last rc, to not upset any existing boards.
> 
> Ahh right. I was 50-50 on whether something like that was said so at
> least I am not going crazy.
> 
> > But you are still correct that we also shouldn't target the 6.2 merge window
> > anymore :-) .
> > 
> > We're after -rc8 now (which is in itself uncommon) and in his -rc7
> > announcement [0], Linus stated
> > 
> > "[...] the usual rule is that things that I get sent for the
> > merge window should have been all ready _before_ the merge window
> > opened. But with the merge window happening largely during the holiday
> > season, I'll just be enforcing that pretty strictly."
> 
> Yah, of all the windows to land patchsets that are being re-spun a few
> days before it opens this probably isn't the best one to pick!
> 
> > That means new stuff should be reviewed and in linux-next _way before_ the
> > merge window opens next weekend. Taking into account that people need
> > to review stuff (and maybe the series needing another round), I really don't
> > see this happening this week and everything else will get us shouted at
> > from atop a christmas tree ;-) .
> > 
> > That's the reason most maintainer-trees stop accepting stuff after -rc7
> 
> Aye, in RISC-V land maybe we will get there one day :)
> 
> For the original question though, breaking them up into 3 or 4 smaller
> bits that could get applied on their own is probably a good idea?
> 
> Between yourselves, Drew and Prabhakar there's a couple series touching
> the same bits. Certainly don't want to seem like I am speaking for the
> Higher Powers here, but some sort of logical ordering would probably be
> a good idea so as not to hold each other up?
> The non-string bit of your series has been fairly well reviewed & would,
> in theory, be mergeable once the tree re-opens? Timing aside, Jisheng's
> idea seems like a good one, no?

yeah, I had that same thought over the weekend - with the generic
part being pretty good in the review and only the string part needing
more work and thus ideally splitting the series [0] .

Jisheng's series just made that even more important to do :-)


Heiko
Andrew Jones Dec. 6, 2022, 5:50 a.m. UTC | #11
On Tue, Dec 06, 2022 at 12:49:51AM +0800, Jisheng Zhang wrote:
> On Tue, Dec 06, 2022 at 12:42:16AM +0800, Jisheng Zhang wrote:
> > On Mon, Dec 05, 2022 at 03:57:10PM +0100, Andrew Jones wrote:
> > > On Mon, Dec 05, 2022 at 01:46:20AM +0800, Jisheng Zhang wrote:
> > > > Alternatives live in a different section, so offsets used by jal
> > > > instruction will point to wrong locations after the patch got applied.
> > > > 
> > > > Similar to arm64, adjust the location to consider that offset.
> > > > 
> > > > Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
> > > > ---
> > > >  arch/riscv/include/asm/alternative.h |  2 ++
> > > >  arch/riscv/kernel/alternative.c      | 38 ++++++++++++++++++++++++++++
> > > >  arch/riscv/kernel/cpufeature.c       |  3 +++
> > > >  3 files changed, 43 insertions(+)
> > > > 
> > > > diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
> > > > index c58ec3cc4bc3..33eae9541684 100644
> > > > --- a/arch/riscv/include/asm/alternative.h
> > > > +++ b/arch/riscv/include/asm/alternative.h
> > > > @@ -29,6 +29,8 @@ void apply_module_alternatives(void *start, size_t length);
> > > >  
> > > >  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > > >  				      int patch_offset);
> > > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > > +			       int patch_offset);
> > > >  
> > > >  struct alt_entry {
> > > >  	void *old_ptr;		 /* address of original instruciton or data  */
> > > > diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
> > > > index 292cc42dc3be..9d88375624b5 100644
> > > > --- a/arch/riscv/kernel/alternative.c
> > > > +++ b/arch/riscv/kernel/alternative.c
> > > > @@ -125,6 +125,44 @@ void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
> > > >  	}
> > > >  }
> > > >  
> > > > +#define to_jal_imm(value)						\
> > > > +	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
> > >                                                                  ^ RV_J_IMM_10_1_OPOFF
> > > 
> > > > +	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
> > > > +	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
> > > > +	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
> > 
> > Hi all,
> > 
> > I believe there's bug in the to_jal_imm() macro implementation, the
> > correct one should be like this:
> > 
> > #define to_jal_imm(value)                                               \
> >         ((RV_X(value, RV_J_IMM_10_1_OFF, RV_J_IMM_10_1_MASK) << RV_J_IMM_10_1_OPOFF) | \
> >         (RV_X(value, RV_J_IMM_11_OFF, RV_J_IMM_11_MASK) << RV_J_IMM_11_OPOFF) | \
> >         (RV_X(value, RV_J_IMM_19_12_OFF, RV_J_IMM_19_12_MASK) << RV_J_IMM_19_12_OPOFF) | \
> >         (RV_X(value, RV_J_IMM_SIGN_OFF, 1) << RV_J_IMM_SIGN_OPOFF))
> 
> And I just tested to_jal_imm() vs RV_EXTRACT_JTYPE_IMM(), they match perfectly.
> E.g:
> RV_EXTRACT_JTYPE_IMM(to_jal_imm(imm)) == imm is alway true when imm is a jal
> valid offset.

I think we should define deposit() functions for each type, as well as the
extract() functions, (and I'd prefer we use static inlines to macros to
get some type checking). See [1] for my proposal.

[1] https://lore.kernel.org/all/20221205145323.l2dro6dt7muumqpn@kamzik/

Thanks,
drew

> 
> > 
> > Will fix it in next version.
> > 
> > Thanks
> > > 
> > > Should put () around value
> > > 
> > > > +
> > > > +void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
> > > > +			       int patch_offset)
> > > > +{
> > > > +	int num_instr = len / sizeof(u32);
> > > > +	unsigned int call;
> > > > +	int i;
> > > > +	int imm;
> > > > +
> > > > +	for (i = 0; i < num_instr; i++) {
> > > > +		u32 inst = riscv_instruction_at(alt_ptr, i);
> > > > +
> > > > +		if (!riscv_insn_is_jal(inst))
> > > > +			continue;
> > > > +
> > > > +		/* get and adjust new target address */
> > > > +		imm = RV_EXTRACT_JTYPE_IMM(inst);
> > > > +		imm -= patch_offset;
> > > > +
> > > > +		/* pick the original jal */
> > > > +		call = inst;
> > > > +
> > > > +		/* drop the old IMMs, all jal imm bits sit at 31:12 */
> > > > +		call &= ~GENMASK(31, 12);
> > > 
> > > It'd be nice if this had a define.
> > > 
> > > > +
> > > > +		/* add the adapted IMMs */
> > > > +		call |= to_jal_imm(imm);
> > > > +
> > > > +		/* patch the call place again */
> > > > +		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
> > > > +	}
> > > > +}
> > > > +
> > > >  /*
> > > >   * This is called very early in the boot process (directly after we run
> > > >   * a feature detect on the boot CPU). No need to worry about other CPUs
> > > > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> > > > index ba62a4ff5ccd..c743f0adc794 100644
> > > > --- a/arch/riscv/kernel/cpufeature.c
> > > > +++ b/arch/riscv/kernel/cpufeature.c
> > > > @@ -324,6 +324,9 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
> > > >  			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
> > > >  							 alt->alt_len,
> > > >  							 alt->old_ptr - alt->alt_ptr);
> > > > +			riscv_alternative_fix_jal(alt->old_ptr,
> > > > +						  alt->alt_len,
> > > > +						  alt->old_ptr - alt->alt_ptr);
> > > >  		}
> > > >  	}
> > > >  }
> > > > -- 
> > > > 2.37.2
> > > >
> > > 
> > > Thanks,
> > > drew
Jisheng Zhang Dec. 6, 2022, 3:02 p.m. UTC | #12
On Tue, Dec 06, 2022 at 01:39:50AM +0100, Heiko Stübner wrote:
> Am Montag, 5. Dezember 2022, 20:49:26 CET schrieb Conor Dooley:
> > On Mon, Dec 05, 2022 at 07:49:01PM +0100, Heiko Stübner wrote:
> > > Am Montag, 5. Dezember 2022, 19:36:45 CET schrieb Conor Dooley:
> > > > Heiko, Jisheng,
> > > > On Mon, Dec 05, 2022 at 11:40:44PM +0800, Jisheng Zhang wrote:
> > > > > Yesterday, I also wanted to unify the two instruction fix into
> > > > > one. But that would need to roll back the
> > > > > riscv_alternative_fix_auipc_jalr() to your v1 version. And IMHO,
> > > > > it's better if you can split the Zbb string optimizations series
> > > > > into two: one for alternative improvements, another for Zbb. Then
> > > > > we may get the alternative improvements and this inst extension
> > > > > series merged in v6.2-rc1.
> > > > 
> > > > Heiko, perhaps you can correct me here:
> > > > 
> > > > Last Wednesday you & Palmer agreed that it was too late in the cycle to
> > > > apply any of the stuff touching alternatives?
> > > > If I do recall correctly, gives plenty of time to sort out any
> > > > interdependent changes here.
> > > > 
> > > > Could easily be misremembering, wouldn't be the first time!
> > > 
> > > You slightly misremembered, but are still correct with the above ;-) .
> > > 
> > > I.e. what we talked about was stuff for fixes for 6.1-rc, were Palmers
> > > wisely wanted to limit additions to really easy fixes for the remaining
> > > last rc, to not upset any existing boards.
> > 
> > Ahh right. I was 50-50 on whether something like that was said so at
> > least I am not going crazy.
> > 
> > > But you are still correct that we also shouldn't target the 6.2 merge window
> > > anymore :-) .
> > > 
> > > We're after -rc8 now (which is in itself uncommon) and in his -rc7
> > > announcement [0], Linus stated
> > > 
> > > "[...] the usual rule is that things that I get sent for the
> > > merge window should have been all ready _before_ the merge window
> > > opened. But with the merge window happening largely during the holiday
> > > season, I'll just be enforcing that pretty strictly."
> > 
> > Yah, of all the windows to land patchsets that are being re-spun a few
> > days before it opens this probably isn't the best one to pick!
> > 
> > > That means new stuff should be reviewed and in linux-next _way before_ the
> > > merge window opens next weekend. Taking into account that people need
> > > to review stuff (and maybe the series needing another round), I really don't
> > > see this happening this week and everything else will get us shouted at
> > > from atop a christmas tree ;-) .
> > > 
> > > That's the reason most maintainer-trees stop accepting stuff after -rc7

Thanks for the information, then we have more time to test and review
this series.

> > 
> > Aye, in RISC-V land maybe we will get there one day :)
> > 
> > For the original question though, breaking them up into 3 or 4 smaller
> > bits that could get applied on their own is probably a good idea?
> > 
> > Between yourselves, Drew and Prabhakar there's a couple series touching
> > the same bits. Certainly don't want to seem like I am speaking for the

Because alternative is the best solution to riscv extensions while still
keep one unified kernel Image ;)

> > Higher Powers here, but some sort of logical ordering would probably be
> > a good idea so as not to hold each other up?
> > The non-string bit of your series has been fairly well reviewed & would,
> > in theory, be mergeable once the tree re-opens? Timing aside, Jisheng's
> > idea seems like a good one, no?

IMHO, it will be better if Palmer can merge Heiko's alternative improvements
into riscv-next once well reviewed and the window is reopen. Then Drew,
Prabhakar and I can rebase on that tree.

> 
> yeah, I had that same thought over the weekend - with the generic
> part being pretty good in the review and only the string part needing
> more work and thus ideally splitting the series [0] .
> 
> Jisheng's series just made that even more important to do :-)
> 
> 
> Heiko
> 
>
Conor Dooley Dec. 6, 2022, 4:12 p.m. UTC | #13
On Tue, Dec 06, 2022 at 11:02:35PM +0800, Jisheng Zhang wrote:

> > > Higher Powers here, but some sort of logical ordering would probably be
> > > a good idea so as not to hold each other up?
> > > The non-string bit of your series has been fairly well reviewed & would,
> > > in theory, be mergeable once the tree re-opens? Timing aside, Jisheng's
> > > idea seems like a good one, no?
> 
> IMHO, it will be better if Palmer can merge Heiko's alternative improvements
> into riscv-next once well reviewed and the window is reopen. Then Drew,
> Prabhakar and I can rebase on that tree.

Unless I missed something, we're saying the same thing in different ways
:)
Conor Dooley Dec. 19, 2022, 9:32 p.m. UTC | #14
On Tue, Dec 06, 2022 at 04:12:35PM +0000, Conor Dooley wrote:
> On Tue, Dec 06, 2022 at 11:02:35PM +0800, Jisheng Zhang wrote:
> 
> > > > Higher Powers here, but some sort of logical ordering would probably be
> > > > a good idea so as not to hold each other up?
> > > > The non-string bit of your series has been fairly well reviewed & would,
> > > > in theory, be mergeable once the tree re-opens? Timing aside, Jisheng's
> > > > idea seems like a good one, no?
> > 
> > IMHO, it will be better if Palmer can merge Heiko's alternative improvements
> > into riscv-next once well reviewed and the window is reopen. Then Drew,
> > Prabhakar and I can rebase on that tree.
> 
> Unless I missed something, we're saying the same thing in different ways
> :)

Hey Jisheng,
FYI I'm gonna mark this version of the patchset as "Changes Requested"
in patchwork. Palmer's said he'll apply Heiko's patchset that this
depends on once rc1 is out so I am expecting that you'll rebase on top
of that with the various comments fixed.
Thanks,
Conor.
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
index c58ec3cc4bc3..33eae9541684 100644
--- a/arch/riscv/include/asm/alternative.h
+++ b/arch/riscv/include/asm/alternative.h
@@ -29,6 +29,8 @@  void apply_module_alternatives(void *start, size_t length);
 
 void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
 				      int patch_offset);
+void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
+			       int patch_offset);
 
 struct alt_entry {
 	void *old_ptr;		 /* address of original instruciton or data  */
diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
index 292cc42dc3be..9d88375624b5 100644
--- a/arch/riscv/kernel/alternative.c
+++ b/arch/riscv/kernel/alternative.c
@@ -125,6 +125,44 @@  void riscv_alternative_fix_auipc_jalr(void *alt_ptr, unsigned int len,
 	}
 }
 
+#define to_jal_imm(value)						\
+	(((value & (RV_J_IMM_10_1_MASK << RV_J_IMM_10_1_OFF)) << RV_I_IMM_11_0_OPOFF) | \
+	 ((value & (RV_J_IMM_11_MASK << RV_J_IMM_11_OFF)) << RV_J_IMM_11_OPOFF) | \
+	 ((value & (RV_J_IMM_19_12_OPOFF << RV_J_IMM_19_12_OFF)) << RV_J_IMM_19_12_OPOFF) | \
+	 ((value & (1 << RV_J_IMM_SIGN_OFF)) << RV_J_IMM_SIGN_OPOFF))
+
+void riscv_alternative_fix_jal(void *alt_ptr, unsigned int len,
+			       int patch_offset)
+{
+	int num_instr = len / sizeof(u32);
+	unsigned int call;
+	int i;
+	int imm;
+
+	for (i = 0; i < num_instr; i++) {
+		u32 inst = riscv_instruction_at(alt_ptr, i);
+
+		if (!riscv_insn_is_jal(inst))
+			continue;
+
+		/* get and adjust new target address */
+		imm = RV_EXTRACT_JTYPE_IMM(inst);
+		imm -= patch_offset;
+
+		/* pick the original jal */
+		call = inst;
+
+		/* drop the old IMMs, all jal imm bits sit at 31:12 */
+		call &= ~GENMASK(31, 12);
+
+		/* add the adapted IMMs */
+		call |= to_jal_imm(imm);
+
+		/* patch the call place again */
+		patch_text_nosync(alt_ptr + i * sizeof(u32), &call, 4);
+	}
+}
+
 /*
  * This is called very early in the boot process (directly after we run
  * a feature detect on the boot CPU). No need to worry about other CPUs
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index ba62a4ff5ccd..c743f0adc794 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -324,6 +324,9 @@  void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
 			riscv_alternative_fix_auipc_jalr(alt->old_ptr,
 							 alt->alt_len,
 							 alt->old_ptr - alt->alt_ptr);
+			riscv_alternative_fix_jal(alt->old_ptr,
+						  alt->alt_len,
+						  alt->old_ptr - alt->alt_ptr);
 		}
 	}
 }