diff mbox series

[v2,3/3] RISC-V: add T-Head vector errata handling

Message ID 20230622231305.631331-4-heiko@sntech.de (mailing list archive)
State Changes Requested
Headers show
Series RISC-V: T-Head vector handling | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 4681dacadeef
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 6 and now 6
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig fail Failed to build the tree with this patch.
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig success Errors and warnings before: 16720 this patch: 16720
conchuod/build_rv32_defconfig fail Build failed
conchuod/dtb_warn_rv64 success Errors and warnings before: 20 this patch: 20
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: Lines should not end with a '('
conchuod/build_rv64_nommu_k210_defconfig fail Build failed
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig fail Build failed

Commit Message

Heiko Stübner June 22, 2023, 11:13 p.m. UTC
From: Heiko Stuebner <heiko.stuebner@vrull.eu>

T-Head C9xx cores implement an older version (0.7.1) of the vector
specification.

Relevant changes concerning the kernel are:
- different placement of the SR_VS bit for the vector unit status
- different encoding of the vsetvli instruction
- different instructions for loads and stores

And a fixed VLEN of 128.

The in-kernel access to vector instances is limited to the save and
restore of process states so the above mentioned areas can simply be
handled via the alternatives framework, similar to other T-Head specific
issues.

Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
---
 arch/riscv/Kconfig.errata            |  13 +++
 arch/riscv/errata/thead/errata.c     |  32 ++++++
 arch/riscv/include/asm/csr.h         |  24 ++++-
 arch/riscv/include/asm/errata_list.h |  45 ++++++++-
 arch/riscv/include/asm/vector.h      | 139 +++++++++++++++++++++++++--
 arch/riscv/kernel/vector.c           |   2 +-
 6 files changed, 238 insertions(+), 17 deletions(-)

Comments

kernel test robot June 23, 2023, 3:11 a.m. UTC | #1
Hi Heiko,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20230622]
[cannot apply to linus/master v6.4-rc7 v6.4-rc6 v6.4-rc5 v6.4-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Heiko-Stuebner/RISC-V-move-vector-available-status-into-a-dedicated-variable/20230623-081314
base:   next-20230622
patch link:    https://lore.kernel.org/r/20230622231305.631331-4-heiko%40sntech.de
patch subject: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling
config: riscv-randconfig-r042-20230622 (https://download.01.org/0day-ci/archive/20230623/202306231142.j8XLzSQL-lkp@intel.com/config)
compiler: riscv64-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230623/202306231142.j8XLzSQL-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306231142.j8XLzSQL-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from arch/riscv/include/asm/alternative.h:9,
                    from arch/riscv/include/asm/errata_list.h:8,
                    from arch/riscv/include/asm/tlbflush.h:12,
                    from arch/riscv/include/asm/pgtable.h:112,
                    from arch/riscv/include/asm/uaccess.h:12,
                    from include/linux/uaccess.h:11,
                    from include/linux/sched/task.h:11,
                    from include/linux/sched/signal.h:9,
                    from include/linux/rcuwait.h:6,
                    from include/linux/percpu-rwsem.h:7,
                    from include/linux/fs.h:33,
                    from include/linux/compat.h:17,
                    from arch/riscv/include/asm/elf.h:12,
                    from include/linux/elf.h:6,
                    from include/linux/module.h:19,
                    from include/linux/device/driver.h:21,
                    from include/linux/device.h:32,
                    from include/linux/energy_model.h:5,
                    from kernel/sched/fair.c:23:
   arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_save':
>> arch/riscv/include/asm/vector.h:206:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
     206 |                 THEAD_VSETVLI_T4X0E8M8D1
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
      78 |         new_c "\n"                                                      \
         |         ^~~~~
   arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
     104 |         __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
         |         ^~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
     152 |         _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
         |         ^~~~~~~~~~~~~~~~
   arch/riscv/include/asm/vector.h:192:23: note: in expansion of macro 'ALTERNATIVE'
     192 |         asm volatile (ALTERNATIVE(
         |                       ^~~~~~~~~~~
   In file included from arch/riscv/include/asm/switch_to.h:11,
                    from kernel/sched/fair.c:51:
   arch/riscv/include/asm/vector.h:188:23: warning: unused variable 'vl' [-Wunused-variable]
     188 |         unsigned long vl;
         |                       ^~
   arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_restore':
   arch/riscv/include/asm/vector.h:239:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
     239 |                 THEAD_VSETVLI_T4X0E8M8D1
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
      78 |         new_c "\n"                                                      \
         |         ^~~~~
   arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
     104 |         __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
         |         ^~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
     152 |         _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
         |         ^~~~~~~~~~~~~~~~
   arch/riscv/include/asm/vector.h:225:23: note: in expansion of macro 'ALTERNATIVE'
     225 |         asm volatile (ALTERNATIVE(
         |                       ^~~~~~~~~~~
   arch/riscv/include/asm/vector.h:222:23: warning: unused variable 'vl' [-Wunused-variable]
     222 |         unsigned long vl;
         |                       ^~
   kernel/sched/fair.c: At top level:
   kernel/sched/fair.c:688:5: warning: no previous prototype for 'sched_update_scaling' [-Wmissing-prototypes]
     688 | int sched_update_scaling(void)
         |     ^~~~~~~~~~~~~~~~~~~~
--
   In file included from arch/riscv/include/asm/alternative.h:9,
                    from arch/riscv/include/asm/errata_list.h:8,
                    from arch/riscv/include/asm/tlbflush.h:12,
                    from arch/riscv/include/asm/pgtable.h:112,
                    from arch/riscv/include/asm/uaccess.h:12,
                    from include/linux/uaccess.h:11,
                    from include/linux/sched/task.h:11,
                    from include/linux/sched/signal.h:9,
                    from include/linux/sched/cputime.h:5,
                    from kernel/sched/build_utility.c:13:
   arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_save':
>> arch/riscv/include/asm/vector.h:206:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
     206 |                 THEAD_VSETVLI_T4X0E8M8D1
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
      78 |         new_c "\n"                                                      \
         |         ^~~~~
   arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
     104 |         __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
         |         ^~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
     152 |         _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
         |         ^~~~~~~~~~~~~~~~
   arch/riscv/include/asm/vector.h:192:23: note: in expansion of macro 'ALTERNATIVE'
     192 |         asm volatile (ALTERNATIVE(
         |                       ^~~~~~~~~~~
   In file included from arch/riscv/include/asm/switch_to.h:11,
                    from kernel/sched/build_utility.c:51:
   arch/riscv/include/asm/vector.h:188:23: warning: unused variable 'vl' [-Wunused-variable]
     188 |         unsigned long vl;
         |                       ^~
   arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_restore':
   arch/riscv/include/asm/vector.h:239:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1'
     239 |                 THEAD_VSETVLI_T4X0E8M8D1
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT'
      78 |         new_c "\n"                                                      \
         |         ^~~~~
   arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG'
     104 |         __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
         |         ^~~~~~~~~~~~~~~~~
   arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG'
     152 |         _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
         |         ^~~~~~~~~~~~~~~~
   arch/riscv/include/asm/vector.h:225:23: note: in expansion of macro 'ALTERNATIVE'
     225 |         asm volatile (ALTERNATIVE(
         |                       ^~~~~~~~~~~
   arch/riscv/include/asm/vector.h:222:23: warning: unused variable 'vl' [-Wunused-variable]
     222 |         unsigned long vl;
         |                       ^~


vim +206 arch/riscv/include/asm/vector.h

   184	
   185	static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
   186						 void *datap)
   187	{
   188		unsigned long vl;
   189	
   190		riscv_v_enable();
   191		__vstate_csr_save(save_to);
   192		asm volatile (ALTERNATIVE(
   193			"nop\n\t"
   194			".option push\n\t"
   195			".option arch, +v\n\t"
   196			"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
   197			"vse8.v		v0, (%1)\n\t"
   198			"add		%1, %1, %0\n\t"
   199			"vse8.v		v8, (%1)\n\t"
   200			"add		%1, %1, %0\n\t"
   201			"vse8.v		v16, (%1)\n\t"
   202			"add		%1, %1, %0\n\t"
   203			"vse8.v		v24, (%1)\n\t"
   204			".option pop\n\t",
   205			"mv		t0, %1\n\t"
 > 206			THEAD_VSETVLI_T4X0E8M8D1
   207			THEAD_VSB_V_V0T0
   208			"addi		t0, t0, 128\n\t"
   209			THEAD_VSB_V_V8T0
   210			"addi		t0, t0, 128\n\t"
   211			THEAD_VSB_V_V16T0
   212			"addi		t0, t0, 128\n\t"
   213			THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
   214			ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
   215			: "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
   216		riscv_v_disable();
   217	}
   218
Conor Dooley June 23, 2023, 9:49 a.m. UTC | #2
Hey Heiko,

On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> 
> T-Head C9xx cores implement an older version (0.7.1) of the vector
> specification.
> 
> Relevant changes concerning the kernel are:
> - different placement of the SR_VS bit for the vector unit status
> - different encoding of the vsetvli instruction
> - different instructions for loads and stores
> 
> And a fixed VLEN of 128.
> 
> The in-kernel access to vector instances is limited to the save and
> restore of process states so the above mentioned areas can simply be
> handled via the alternatives framework, similar to other T-Head specific
> issues.
> 
> Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> ---
>  arch/riscv/Kconfig.errata            |  13 +++
>  arch/riscv/errata/thead/errata.c     |  32 ++++++
>  arch/riscv/include/asm/csr.h         |  24 ++++-
>  arch/riscv/include/asm/errata_list.h |  45 ++++++++-
>  arch/riscv/include/asm/vector.h      | 139 +++++++++++++++++++++++++--
>  arch/riscv/kernel/vector.c           |   2 +-
>  6 files changed, 238 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> index 0c8f4652cd82..b461312dd452 100644
> --- a/arch/riscv/Kconfig.errata
> +++ b/arch/riscv/Kconfig.errata
> @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
>  
>  	  If you don't know what to do here, say "Y".
>  
> +config ERRATA_THEAD_VECTOR
> +	bool "Apply T-Head Vector errata"
> +	depends on ERRATA_THEAD && RISCV_ISA_V
> +	default y
> +	help
> +	  The T-Head C9xx cores implement an earlier version 0.7.1
> +	  of the vector extensions.
> +
> +	  This will apply the necessary errata to handle the non-standard
> +	  behaviour via when switch to and from vector mode for processes.
> +
> +	  If you don't know what to do here, say "Y".
> +
>  endmenu # "CPU errata selection"
> diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> index c259dc925ec1..c41ec84bc8a5 100644
> --- a/arch/riscv/errata/thead/errata.c
> +++ b/arch/riscv/errata/thead/errata.c
> @@ -15,6 +15,7 @@
>  #include <asm/errata_list.h>
>  #include <asm/hwprobe.h>
>  #include <asm/patch.h>
> +#include <asm/vector.h>
>  #include <asm/vendorid_list.h>
>  
>  static bool errata_probe_pbmt(unsigned int stage,
> @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
>  	return true;
>  }
>  
> +static bool errata_probe_vector(unsigned int stage,
> +				unsigned long arch_id, unsigned long impid)
> +{
> +	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
> +		return false;
> +
> +	/* target-c9xx cores report arch_id and impid as 0 */
> +	if (arch_id != 0 || impid != 0)
> +		return false;
> +
> +	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
> +		/*
> +		 * Disable VECTOR to detect illegal usage of vector in kernel.
> +		 * This is normally done in _start_kernel but with the
> +		 * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
> +		 * vector-0.7.1 and the vector-1.0-bits are unused there.
> +		 */
> +		csr_clear(CSR_STATUS, SR_VS_THEAD);
> +		return false;
> +	}
> +
> +	/* let has_vector() return true and set the static vlen */

Hmm, I was wondering about how you were going to communicate this to
userspace, since you're not going to be setting "v" in your DT, so
there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
this is your intention, as you'd not need to drop the alternative-based
stuff from has_vector() if it wasn't)

I don't think you can do this, as things stand, because of how hwprobe
operates:

static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
			     const struct cpumask *cpus)
{
	...

	if (has_vector())
		pair->value |= RISCV_HWPROBE_IMA_V;

	...
}

  * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
    version 1.0 of the RISC-V Vector extension manual.

You'll need to change hwprobe to use has_vector() &&
riscv_has_extension_unlikely(v), or similar, as the condition for
reporting. You'll also need some other way to communicate to userspace
that T-Head's vector stuff is supported, no?

I'm also _really_ unconvinced that turning on extensions that were not
indicated in the DT or via ACPI is something we should be doing. Have I
missed something here that'd make that assessment inaccurate?

Cheers,
Conor.

FWIW I am currently working on kernel-side support for the new
extension properties that I have posted dt-binding patches for.
I'll go post it once Palmer has merged his current set of patches in his
staging repo into for-next, as I've got a lot of deps on riscv,isa
parser changes.
I'm really hoping that it provides an easier way to go off probing for
vendor specific stuff for DT-based systems, since it will no longer
require complex probing, just an of_property_match_string() for each
possible cpu and we could very well provide a vendor hook during that
process.
Clearly though, that stuff is not yet merged as it has not even been
posted yet.

Current WIP of that is here:
https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer

> +	riscv_vector_supported();
> +	riscv_v_vsize = 128 / 8 * 32;
> +
> +	return true;
> +}
> +
>  static u32 thead_errata_probe(unsigned int stage,
>  			      unsigned long archid, unsigned long impid)
>  {
> @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
>  	if (errata_probe_pmu(stage, archid, impid))
>  		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
>  
> +	if (errata_probe_vector(stage, archid, impid))
> +		cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
> +
>  	return cpu_req_errata;
>  }
>  
> diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> index 2d79bca6ffe8..521b3b939e51 100644
> --- a/arch/riscv/include/asm/csr.h
> +++ b/arch/riscv/include/asm/csr.h
> @@ -24,11 +24,25 @@
>  #define SR_FS_CLEAN	_AC(0x00004000, UL)
>  #define SR_FS_DIRTY	_AC(0x00006000, UL)
>  
> -#define SR_VS		_AC(0x00000600, UL) /* Vector Status */
> -#define SR_VS_OFF	_AC(0x00000000, UL)
> -#define SR_VS_INITIAL	_AC(0x00000200, UL)
> -#define SR_VS_CLEAN	_AC(0x00000400, UL)
> -#define SR_VS_DIRTY	_AC(0x00000600, UL)
> +#define SR_VS_OFF		_AC(0x00000000, UL)
> +
> +#define SR_VS_1_0		_AC(0x00000600, UL) /* Vector Status */
> +#define SR_VS_INITIAL_1_0	_AC(0x00000200, UL)
> +#define SR_VS_CLEAN_1_0		_AC(0x00000400, UL)
> +#define SR_VS_DIRTY_1_0		_AC(0x00000600, UL)
> +
> +#define SR_VS_THEAD		_AC(0x01800000, UL) /* Vector Status */
> +#define SR_VS_INITIAL_THEAD	_AC(0x00800000, UL)
> +#define SR_VS_CLEAN_THEAD	_AC(0x01000000, UL)
> +#define SR_VS_DIRTY_THEAD	_AC(0x01800000, UL)
> +
> +/*
> + * Always default to vector-1.0 handling in assembly and let the broken
> + * implementations handle their case separately.
> + */
> +#ifdef __ASSEMBLY__
> +#define SR_VS			SR_VS_1_0
> +#endif
>  
>  #define SR_XS		_AC(0x00018000, UL) /* Extension Status */
>  #define SR_XS_OFF	_AC(0x00000000, UL)
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index fb1a810f3d8c..ab21fadbe9c6 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -21,7 +21,8 @@
>  #define	ERRATA_THEAD_PBMT 0
>  #define	ERRATA_THEAD_CMO 1
>  #define	ERRATA_THEAD_PMU 2
> -#define	ERRATA_THEAD_NUMBER 3
> +#define	ERRATA_THEAD_VECTOR 3
> +#define	ERRATA_THEAD_NUMBER 4
>  #endif
>  
>  #ifdef __ASSEMBLY__
> @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE(						\
>  	: "=r" (__ovl) :						\
>  	: "memory")
>  
> +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> +
> +#define THEAD_C9XX_CSR_VXSAT			0x9
> +#define THEAD_C9XX_CSR_VXRM			0xa
> +
> +/*
> + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> + * vsetvli	t4, x0, e8, m8, d1
> + */
> +#define THEAD_VSETVLI_T4X0E8M8D1	".long	0x00307ed7\n\t"
> +
> +/*
> + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> + * the call resulting in a different encoding and then using a value for
> + * the "mop" field that is not part of vector-0.7.1
> + * So encode specific variants for vstate_save and _restore.
> + */
> +#define THEAD_VSB_V_V0T0		".long	0x02028027\n\t"
> +#define THEAD_VSB_V_V8T0		".long	0x02028427\n\t"
> +#define THEAD_VSB_V_V16T0		".long	0x02028827\n\t"
> +#define THEAD_VSB_V_V24T0		".long	0x02028c27\n\t"
> +#define THEAD_VLB_V_V0T0		".long	0x012028007\n\t"
> +#define THEAD_VLB_V_V8T0		".long	0x012028407\n\t"
> +#define THEAD_VLB_V_V16T0		".long	0x012028807\n\t"
> +#define THEAD_VLB_V_V24T0		".long	0x012028c07\n\t"
> +
> +#define ALT_SR_VS_VECTOR_1_0_SHIFT	9
> +#define ALT_SR_VS_THEAD_SHIFT		23
> +
> +#define ALT_SR_VS(_val, prot)						\
> +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3",				\
> +		"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,		\
> +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)	\
> +		: "=r"(_val)						\
> +		: "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT),	\
> +		  "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT),		\
> +		  "I"(ALT_SR_VS_VECTOR_1_0_SHIFT),			\
> +		  "I"(ALT_SR_VS_THEAD_SHIFT))
> +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif
> diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> index 315c96d2b4d0..fa47f60f81e3 100644
> --- a/arch/riscv/include/asm/vector.h
> +++ b/arch/riscv/include/asm/vector.h
> @@ -18,6 +18,55 @@
>  #include <asm/hwcap.h>
>  #include <asm/csr.h>
>  #include <asm/asm.h>
> +#include <asm/errata_list.h>
> +
> +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> +
> +static inline unsigned long riscv_sr_vs(void)
> +{
> +	u32 val;
> +
> +	ALT_SR_VS(val, SR_VS);
> +	return val;
> +}
> +
> +static inline unsigned long riscv_sr_vs_initial(void)
> +{
> +	u32 val;
> +
> +	ALT_SR_VS(val, SR_VS_INITIAL);
> +	return val;
> +}
> +
> +static inline unsigned long riscv_sr_vs_clean(void)
> +{
> +	u32 val;
> +
> +	ALT_SR_VS(val, SR_VS_CLEAN);
> +	return val;
> +}
> +
> +static inline unsigned long riscv_sr_vs_dirty(void)
> +{
> +	u32 val;
> +
> +	ALT_SR_VS(val, SR_VS_DIRTY);
> +	return val;
> +}
> +
> +#define SR_VS		riscv_sr_vs()
> +#define SR_VS_INITIAL	riscv_sr_vs_initial()
> +#define SR_VS_CLEAN	riscv_sr_vs_clean()
> +#define SR_VS_DIRTY	riscv_sr_vs_dirty()
> +
> +#else /* CONFIG_ERRATA_THEAD_VECTOR */
> +
> +#define SR_VS		SR_VS_1_0
> +#define SR_VS_INITIAL	SR_VS_INITIAL_1_0
> +#define SR_VS_CLEAN	SR_VS_CLEAN_1_0
> +#define SR_VS_DIRTY	SR_VS_DIRTY_1_0
> +
> +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
>  
>  extern bool riscv_v_supported;
>  void riscv_vector_supported(void);
> @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
>  
>  static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
>  {
> -	asm volatile (
> +	register u32 t1 asm("t1") = (SR_FS);
> +
> +	/*
> +	 * CSR_VCSR is defined as
> +	 * [2:1] - vxrm[1:0]
> +	 * [0] - vxsat
> +	 * The earlier vector spec implemented by T-Head uses separate
> +	 * registers for the same bit-elements, so just combine those
> +	 * into the existing output field.
> +	 *
> +	 * Additionally T-Head cores need FS to be enabled when accessing
> +	 * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
> +	 * Though the cores do not implement the VXRM and VXSAT fields in the
> +	 * FCSR CSR that vector-0.7.1 specifies.
> +	 */
> +	asm volatile (ALTERNATIVE(
>  		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
>  		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
>  		"csrr	%2, " __stringify(CSR_VL) "\n\t"
>  		"csrr	%3, " __stringify(CSR_VCSR) "\n\t"
> +		__nops(5),
> +		"csrs	sstatus, t1\n\t"
> +		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
> +		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
> +		"csrr	%2, " __stringify(CSR_VL) "\n\t"
> +		"csrr	%3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
> +		"slliw	%3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> +		"csrr	t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
> +		"or	%3, %3, t4\n\t"
> +		"csrc	sstatus, t1\n\t",
> +		THEAD_VENDOR_ID,
> +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>  		: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
> -		  "=r" (dest->vcsr) : :);
> +		  "=r" (dest->vcsr) : "r"(t1) : "t4");
>  }
>  
>  static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
>  {
> -	asm volatile (
> +	register u32 t1 asm("t1") = (SR_FS);
> +
> +	/*
> +	 * Similar to __vstate_csr_save above, restore values for the
> +	 * separate VXRM and VXSAT CSRs from the vcsr variable.
> +	 */
> +	asm volatile (ALTERNATIVE(
>  		".option push\n\t"
>  		".option arch, +v\n\t"
>  		"vsetvl	 x0, %2, %1\n\t"
>  		".option pop\n\t"
>  		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
>  		"csrw	" __stringify(CSR_VCSR) ", %3\n\t"
> +		__nops(6),
> +		"csrs	sstatus, t1\n\t"
> +		".option push\n\t"
> +		".option arch, +v\n\t"
> +		"vsetvl	 x0, %2, %1\n\t"
> +		".option pop\n\t"
> +		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
> +		"srliw	t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> +		"andi	t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
> +		"csrw	" __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
> +		"andi	%3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
> +		"csrw	" __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
> +		"csrc	sstatus, t1\n\t",
> +		THEAD_VENDOR_ID,
> +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>  		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
> -		    "r" (src->vcsr) :);
> +		    "r" (src->vcsr), "r"(t1) : "t4");
>  }
>  
>  static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>  
>  	riscv_v_enable();
>  	__vstate_csr_save(save_to);
> -	asm volatile (
> +	asm volatile (ALTERNATIVE(
> +		"nop\n\t"
>  		".option push\n\t"
>  		".option arch, +v\n\t"
>  		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
> @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>  		"vse8.v		v16, (%1)\n\t"
>  		"add		%1, %1, %0\n\t"
>  		"vse8.v		v24, (%1)\n\t"
> -		".option pop\n\t"
> -		: "=&r" (vl) : "r" (datap) : "memory");
> +		".option pop\n\t",
> +		"mv		t0, %1\n\t"
> +		THEAD_VSETVLI_T4X0E8M8D1
> +		THEAD_VSB_V_V0T0
> +		"addi		t0, t0, 128\n\t"
> +		THEAD_VSB_V_V8T0
> +		"addi		t0, t0, 128\n\t"
> +		THEAD_VSB_V_V16T0
> +		"addi		t0, t0, 128\n\t"
> +		THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
> +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> +		: "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
>  	riscv_v_disable();
>  }
>  
> @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
>  	unsigned long vl;
>  
>  	riscv_v_enable();
> -	asm volatile (
> +	asm volatile (ALTERNATIVE(
> +		"nop\n\t"
>  		".option push\n\t"
>  		".option arch, +v\n\t"
>  		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
> @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
>  		"vle8.v		v16, (%1)\n\t"
>  		"add		%1, %1, %0\n\t"
>  		"vle8.v		v24, (%1)\n\t"
> -		".option pop\n\t"
> -		: "=&r" (vl) : "r" (datap) : "memory");
> +		".option pop\n\t",
> +		"mv		t0, %1\n\t"
> +		THEAD_VSETVLI_T4X0E8M8D1
> +		THEAD_VLB_V_V0T0
> +		"addi		t0, t0, 128\n\t"
> +		THEAD_VLB_V_V8T0
> +		"addi		t0, t0, 128\n\t"
> +		THEAD_VLB_V_V16T0
> +		"addi		t0, t0, 128\n\t"
> +		THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
> +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> +		: "=&r" (vl) : "r" (datap) : "t0", "t4");
>  	__vstate_csr_restore(restore_from);
>  	riscv_v_disable();
>  }
> diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
> index 74178fb71805..51726890a4d0 100644
> --- a/arch/riscv/kernel/vector.c
> +++ b/arch/riscv/kernel/vector.c
> @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
>  	u32 insn = (u32)regs->badaddr;
>  
>  	/* Do not handle if V is not supported, or disabled */
> -	if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
> +	if (!has_vector())
>  		return false;
>  
>  	/* If V has been enabled then it is not the first-use trap */
> -- 
> 2.39.2
>
Heiko Stübner June 23, 2023, 10:40 a.m. UTC | #3
Hey Conor,

Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley:
> On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > 
> > T-Head C9xx cores implement an older version (0.7.1) of the vector
> > specification.
> > 
> > Relevant changes concerning the kernel are:
> > - different placement of the SR_VS bit for the vector unit status
> > - different encoding of the vsetvli instruction
> > - different instructions for loads and stores
> > 
> > And a fixed VLEN of 128.
> > 
> > The in-kernel access to vector instances is limited to the save and
> > restore of process states so the above mentioned areas can simply be
> > handled via the alternatives framework, similar to other T-Head specific
> > issues.
> > 
> > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > ---
> >  arch/riscv/Kconfig.errata            |  13 +++
> >  arch/riscv/errata/thead/errata.c     |  32 ++++++
> >  arch/riscv/include/asm/csr.h         |  24 ++++-
> >  arch/riscv/include/asm/errata_list.h |  45 ++++++++-
> >  arch/riscv/include/asm/vector.h      | 139 +++++++++++++++++++++++++--
> >  arch/riscv/kernel/vector.c           |   2 +-
> >  6 files changed, 238 insertions(+), 17 deletions(-)
> > 
> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> > index 0c8f4652cd82..b461312dd452 100644
> > --- a/arch/riscv/Kconfig.errata
> > +++ b/arch/riscv/Kconfig.errata
> > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
> >  
> >  	  If you don't know what to do here, say "Y".
> >  
> > +config ERRATA_THEAD_VECTOR
> > +	bool "Apply T-Head Vector errata"
> > +	depends on ERRATA_THEAD && RISCV_ISA_V
> > +	default y
> > +	help
> > +	  The T-Head C9xx cores implement an earlier version 0.7.1
> > +	  of the vector extensions.
> > +
> > +	  This will apply the necessary errata to handle the non-standard
> > +	  behaviour via when switch to and from vector mode for processes.
> > +
> > +	  If you don't know what to do here, say "Y".
> > +
> >  endmenu # "CPU errata selection"
> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > index c259dc925ec1..c41ec84bc8a5 100644
> > --- a/arch/riscv/errata/thead/errata.c
> > +++ b/arch/riscv/errata/thead/errata.c
> > @@ -15,6 +15,7 @@
> >  #include <asm/errata_list.h>
> >  #include <asm/hwprobe.h>
> >  #include <asm/patch.h>
> > +#include <asm/vector.h>
> >  #include <asm/vendorid_list.h>
> >  
> >  static bool errata_probe_pbmt(unsigned int stage,
> > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
> >  	return true;
> >  }
> >  
> > +static bool errata_probe_vector(unsigned int stage,
> > +				unsigned long arch_id, unsigned long impid)
> > +{
> > +	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
> > +		return false;
> > +
> > +	/* target-c9xx cores report arch_id and impid as 0 */
> > +	if (arch_id != 0 || impid != 0)
> > +		return false;
> > +
> > +	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
> > +		/*
> > +		 * Disable VECTOR to detect illegal usage of vector in kernel.
> > +		 * This is normally done in _start_kernel but with the
> > +		 * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
> > +		 * vector-0.7.1 and the vector-1.0-bits are unused there.
> > +		 */
> > +		csr_clear(CSR_STATUS, SR_VS_THEAD);
> > +		return false;
> > +	}
> > +
> > +	/* let has_vector() return true and set the static vlen */
> 
> Hmm, I was wondering about how you were going to communicate this to
> userspace, since you're not going to be setting "v" in your DT, so
> there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
> this is your intention, as you'd not need to drop the alternative-based
> stuff from has_vector() if it wasn't)

I'm working on the assumption that the t-head vector is way to different
from the official vector, that a userspace will definitly need to handle this
in some way specially and we can't claim to use a "real" vector spec.

So in this first step, my goal is to simply allow userspace programs
compiled to use the t-head vector instructions (i.e. 0.7.1 presumably) to
not hang the kernel and do all the necessary bringup and teardown needed
for executing those vector instructions ;-) .


> I don't think you can do this, as things stand, because of how hwprobe
> operates:
> 
> static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
> 			     const struct cpumask *cpus)
> {
> 	...
> 
> 	if (has_vector())
> 		pair->value |= RISCV_HWPROBE_IMA_V;
> 
> 	...
> }
> 
>   * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
>     version 1.0 of the RISC-V Vector extension manual.
> 
> You'll need to change hwprobe to use has_vector() &&
> riscv_has_extension_unlikely(v), or similar, as the condition for
> reporting.

ah right, and yes I need to adapt hwprobe as you wrote.


> You'll also need some other way to communicate to userspace
> that T-Head's vector stuff is supported, no?

As I said above, baby-steps - not-dying first ;-) .


> I'm also _really_ unconvinced that turning on extensions that were not
> indicated in the DT or via ACPI is something we should be doing. Have I
> missed something here that'd make that assessment inaccurate?

Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration
space (sermon of DT maintainers for years), so the ISA string in DT will
simply describe _all_ extensions the hardware supports. So there _should_
never be a case of "I want to disable vectors and will remove the letter
from the ISA string".

For T-Head we _know_ from vendor-id and friends that the core supports
this special brand of vectors.

We're also turning on the t-head equivalent of svpbmt and zicbom with
probably the same reasoning.


> FWIW I am currently working on kernel-side support for the new
> extension properties that I have posted dt-binding patches for.
> I'll go post it once Palmer has merged his current set of patches in his
> staging repo into for-next, as I've got a lot of deps on riscv,isa
> parser changes.
> I'm really hoping that it provides an easier way to go off probing for
> vendor specific stuff for DT-based systems, since it will no longer
> require complex probing, just an of_property_match_string() for each
> possible cpu and we could very well provide a vendor hook during that
> process.
> Clearly though, that stuff is not yet merged as it has not even been
> posted yet.

And with the comments I received, T-Head vector also is not ready for
prime-time yet, so we're all good :-)


Heiko


> Current WIP of that is here:
> https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer
> 
> > +	riscv_vector_supported();
> > +	riscv_v_vsize = 128 / 8 * 32;
> > +
> > +	return true;
> > +}
> > +
> >  static u32 thead_errata_probe(unsigned int stage,
> >  			      unsigned long archid, unsigned long impid)
> >  {
> > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
> >  	if (errata_probe_pmu(stage, archid, impid))
> >  		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
> >  
> > +	if (errata_probe_vector(stage, archid, impid))
> > +		cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
> > +
> >  	return cpu_req_errata;
> >  }
> >  
> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> > index 2d79bca6ffe8..521b3b939e51 100644
> > --- a/arch/riscv/include/asm/csr.h
> > +++ b/arch/riscv/include/asm/csr.h
> > @@ -24,11 +24,25 @@
> >  #define SR_FS_CLEAN	_AC(0x00004000, UL)
> >  #define SR_FS_DIRTY	_AC(0x00006000, UL)
> >  
> > -#define SR_VS		_AC(0x00000600, UL) /* Vector Status */
> > -#define SR_VS_OFF	_AC(0x00000000, UL)
> > -#define SR_VS_INITIAL	_AC(0x00000200, UL)
> > -#define SR_VS_CLEAN	_AC(0x00000400, UL)
> > -#define SR_VS_DIRTY	_AC(0x00000600, UL)
> > +#define SR_VS_OFF		_AC(0x00000000, UL)
> > +
> > +#define SR_VS_1_0		_AC(0x00000600, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_1_0	_AC(0x00000200, UL)
> > +#define SR_VS_CLEAN_1_0		_AC(0x00000400, UL)
> > +#define SR_VS_DIRTY_1_0		_AC(0x00000600, UL)
> > +
> > +#define SR_VS_THEAD		_AC(0x01800000, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_THEAD	_AC(0x00800000, UL)
> > +#define SR_VS_CLEAN_THEAD	_AC(0x01000000, UL)
> > +#define SR_VS_DIRTY_THEAD	_AC(0x01800000, UL)
> > +
> > +/*
> > + * Always default to vector-1.0 handling in assembly and let the broken
> > + * implementations handle their case separately.
> > + */
> > +#ifdef __ASSEMBLY__
> > +#define SR_VS			SR_VS_1_0
> > +#endif
> >  
> >  #define SR_XS		_AC(0x00018000, UL) /* Extension Status */
> >  #define SR_XS_OFF	_AC(0x00000000, UL)
> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > index fb1a810f3d8c..ab21fadbe9c6 100644
> > --- a/arch/riscv/include/asm/errata_list.h
> > +++ b/arch/riscv/include/asm/errata_list.h
> > @@ -21,7 +21,8 @@
> >  #define	ERRATA_THEAD_PBMT 0
> >  #define	ERRATA_THEAD_CMO 1
> >  #define	ERRATA_THEAD_PMU 2
> > -#define	ERRATA_THEAD_NUMBER 3
> > +#define	ERRATA_THEAD_VECTOR 3
> > +#define	ERRATA_THEAD_NUMBER 4
> >  #endif
> >  
> >  #ifdef __ASSEMBLY__
> > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE(						\
> >  	: "=r" (__ovl) :						\
> >  	: "memory")
> >  
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +#define THEAD_C9XX_CSR_VXSAT			0x9
> > +#define THEAD_C9XX_CSR_VXRM			0xa
> > +
> > +/*
> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> > + * vsetvli	t4, x0, e8, m8, d1
> > + */
> > +#define THEAD_VSETVLI_T4X0E8M8D1	".long	0x00307ed7\n\t"
> > +
> > +/*
> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> > + * the call resulting in a different encoding and then using a value for
> > + * the "mop" field that is not part of vector-0.7.1
> > + * So encode specific variants for vstate_save and _restore.
> > + */
> > +#define THEAD_VSB_V_V0T0		".long	0x02028027\n\t"
> > +#define THEAD_VSB_V_V8T0		".long	0x02028427\n\t"
> > +#define THEAD_VSB_V_V16T0		".long	0x02028827\n\t"
> > +#define THEAD_VSB_V_V24T0		".long	0x02028c27\n\t"
> > +#define THEAD_VLB_V_V0T0		".long	0x012028007\n\t"
> > +#define THEAD_VLB_V_V8T0		".long	0x012028407\n\t"
> > +#define THEAD_VLB_V_V16T0		".long	0x012028807\n\t"
> > +#define THEAD_VLB_V_V24T0		".long	0x012028c07\n\t"
> > +
> > +#define ALT_SR_VS_VECTOR_1_0_SHIFT	9
> > +#define ALT_SR_VS_THEAD_SHIFT		23
> > +
> > +#define ALT_SR_VS(_val, prot)						\
> > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3",				\
> > +		"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,		\
> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)	\
> > +		: "=r"(_val)						\
> > +		: "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT),	\
> > +		  "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT),		\
> > +		  "I"(ALT_SR_VS_VECTOR_1_0_SHIFT),			\
> > +		  "I"(ALT_SR_VS_THEAD_SHIFT))
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> >  #endif /* __ASSEMBLY__ */
> >  
> >  #endif
> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> > index 315c96d2b4d0..fa47f60f81e3 100644
> > --- a/arch/riscv/include/asm/vector.h
> > +++ b/arch/riscv/include/asm/vector.h
> > @@ -18,6 +18,55 @@
> >  #include <asm/hwcap.h>
> >  #include <asm/csr.h>
> >  #include <asm/asm.h>
> > +#include <asm/errata_list.h>
> > +
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +static inline unsigned long riscv_sr_vs(void)
> > +{
> > +	u32 val;
> > +
> > +	ALT_SR_VS(val, SR_VS);
> > +	return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_initial(void)
> > +{
> > +	u32 val;
> > +
> > +	ALT_SR_VS(val, SR_VS_INITIAL);
> > +	return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_clean(void)
> > +{
> > +	u32 val;
> > +
> > +	ALT_SR_VS(val, SR_VS_CLEAN);
> > +	return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_dirty(void)
> > +{
> > +	u32 val;
> > +
> > +	ALT_SR_VS(val, SR_VS_DIRTY);
> > +	return val;
> > +}
> > +
> > +#define SR_VS		riscv_sr_vs()
> > +#define SR_VS_INITIAL	riscv_sr_vs_initial()
> > +#define SR_VS_CLEAN	riscv_sr_vs_clean()
> > +#define SR_VS_DIRTY	riscv_sr_vs_dirty()
> > +
> > +#else /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> > +#define SR_VS		SR_VS_1_0
> > +#define SR_VS_INITIAL	SR_VS_INITIAL_1_0
> > +#define SR_VS_CLEAN	SR_VS_CLEAN_1_0
> > +#define SR_VS_DIRTY	SR_VS_DIRTY_1_0
> > +
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> >  
> >  extern bool riscv_v_supported;
> >  void riscv_vector_supported(void);
> > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
> >  
> >  static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
> >  {
> > -	asm volatile (
> > +	register u32 t1 asm("t1") = (SR_FS);
> > +
> > +	/*
> > +	 * CSR_VCSR is defined as
> > +	 * [2:1] - vxrm[1:0]
> > +	 * [0] - vxsat
> > +	 * The earlier vector spec implemented by T-Head uses separate
> > +	 * registers for the same bit-elements, so just combine those
> > +	 * into the existing output field.
> > +	 *
> > +	 * Additionally T-Head cores need FS to be enabled when accessing
> > +	 * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
> > +	 * Though the cores do not implement the VXRM and VXSAT fields in the
> > +	 * FCSR CSR that vector-0.7.1 specifies.
> > +	 */
> > +	asm volatile (ALTERNATIVE(
> >  		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
> >  		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
> >  		"csrr	%2, " __stringify(CSR_VL) "\n\t"
> >  		"csrr	%3, " __stringify(CSR_VCSR) "\n\t"
> > +		__nops(5),
> > +		"csrs	sstatus, t1\n\t"
> > +		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
> > +		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
> > +		"csrr	%2, " __stringify(CSR_VL) "\n\t"
> > +		"csrr	%3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
> > +		"slliw	%3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > +		"csrr	t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
> > +		"or	%3, %3, t4\n\t"
> > +		"csrc	sstatus, t1\n\t",
> > +		THEAD_VENDOR_ID,
> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> >  		: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
> > -		  "=r" (dest->vcsr) : :);
> > +		  "=r" (dest->vcsr) : "r"(t1) : "t4");
> >  }
> >  
> >  static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
> >  {
> > -	asm volatile (
> > +	register u32 t1 asm("t1") = (SR_FS);
> > +
> > +	/*
> > +	 * Similar to __vstate_csr_save above, restore values for the
> > +	 * separate VXRM and VXSAT CSRs from the vcsr variable.
> > +	 */
> > +	asm volatile (ALTERNATIVE(
> >  		".option push\n\t"
> >  		".option arch, +v\n\t"
> >  		"vsetvl	 x0, %2, %1\n\t"
> >  		".option pop\n\t"
> >  		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
> >  		"csrw	" __stringify(CSR_VCSR) ", %3\n\t"
> > +		__nops(6),
> > +		"csrs	sstatus, t1\n\t"
> > +		".option push\n\t"
> > +		".option arch, +v\n\t"
> > +		"vsetvl	 x0, %2, %1\n\t"
> > +		".option pop\n\t"
> > +		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
> > +		"srliw	t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > +		"andi	t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
> > +		"csrw	" __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
> > +		"andi	%3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
> > +		"csrw	" __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
> > +		"csrc	sstatus, t1\n\t",
> > +		THEAD_VENDOR_ID,
> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> >  		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
> > -		    "r" (src->vcsr) :);
> > +		    "r" (src->vcsr), "r"(t1) : "t4");
> >  }
> >  
> >  static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> >  
> >  	riscv_v_enable();
> >  	__vstate_csr_save(save_to);
> > -	asm volatile (
> > +	asm volatile (ALTERNATIVE(
> > +		"nop\n\t"
> >  		".option push\n\t"
> >  		".option arch, +v\n\t"
> >  		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
> > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> >  		"vse8.v		v16, (%1)\n\t"
> >  		"add		%1, %1, %0\n\t"
> >  		"vse8.v		v24, (%1)\n\t"
> > -		".option pop\n\t"
> > -		: "=&r" (vl) : "r" (datap) : "memory");
> > +		".option pop\n\t",
> > +		"mv		t0, %1\n\t"
> > +		THEAD_VSETVLI_T4X0E8M8D1
> > +		THEAD_VSB_V_V0T0
> > +		"addi		t0, t0, 128\n\t"
> > +		THEAD_VSB_V_V8T0
> > +		"addi		t0, t0, 128\n\t"
> > +		THEAD_VSB_V_V16T0
> > +		"addi		t0, t0, 128\n\t"
> > +		THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > +		: "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
> >  	riscv_v_disable();
> >  }
> >  
> > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> >  	unsigned long vl;
> >  
> >  	riscv_v_enable();
> > -	asm volatile (
> > +	asm volatile (ALTERNATIVE(
> > +		"nop\n\t"
> >  		".option push\n\t"
> >  		".option arch, +v\n\t"
> >  		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
> > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> >  		"vle8.v		v16, (%1)\n\t"
> >  		"add		%1, %1, %0\n\t"
> >  		"vle8.v		v24, (%1)\n\t"
> > -		".option pop\n\t"
> > -		: "=&r" (vl) : "r" (datap) : "memory");
> > +		".option pop\n\t",
> > +		"mv		t0, %1\n\t"
> > +		THEAD_VSETVLI_T4X0E8M8D1
> > +		THEAD_VLB_V_V0T0
> > +		"addi		t0, t0, 128\n\t"
> > +		THEAD_VLB_V_V8T0
> > +		"addi		t0, t0, 128\n\t"
> > +		THEAD_VLB_V_V16T0
> > +		"addi		t0, t0, 128\n\t"
> > +		THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > +		: "=&r" (vl) : "r" (datap) : "t0", "t4");
> >  	__vstate_csr_restore(restore_from);
> >  	riscv_v_disable();
> >  }
> > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
> > index 74178fb71805..51726890a4d0 100644
> > --- a/arch/riscv/kernel/vector.c
> > +++ b/arch/riscv/kernel/vector.c
> > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
> >  	u32 insn = (u32)regs->badaddr;
> >  
> >  	/* Do not handle if V is not supported, or disabled */
> > -	if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
> > +	if (!has_vector())
> >  		return false;
> >  
> >  	/* If V has been enabled then it is not the first-use trap */
>
Conor Dooley June 23, 2023, 11:44 a.m. UTC | #4
On Fri, Jun 23, 2023 at 12:40:43PM +0200, Heiko Stübner wrote:
> Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley:
> > On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> > > From: Heiko Stuebner <heiko.stuebner@vrull.eu>

> > I'm also _really_ unconvinced that turning on extensions that were not
> > indicated in the DT or via ACPI is something we should be doing. Have I
> > missed something here that'd make that assessment inaccurate?
> 
> Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration
> space (sermon of DT maintainers for years), so the ISA string in DT will
> simply describe _all_ extensions the hardware supports. So there _should_
> never be a case of "I want to disable vectors and will remove the letter
> from the ISA string".

I think I pointed it out previously, on the thread about using the isa
string in hwcap that you proposed, but it was things like hypervisors
that modify the DT that they pass to guests that I was talking about
here, rather than an end-user. Obviously this doesn't apply to things
that do not have hypervisor support, but if/when those do exist you'd be
relying on them not having the empty arch/impl ids.

> We're also turning on the t-head equivalent of svpbmt and zicbom with
> probably the same reasoning.

I'd argue that we should describe these things in whatever a non isa
string DT property ends up looking like, even if we missed the boat on
putting them in riscv,isa.

Maybe this is a self-serving interpretation, but I see the svpbmt and
zicbom equivalents somewhat differently. They're done under the hood,
ostensibly to make the thing spec compliant (it still claims to be
rv64gc). This one is "turn on a new, user-visible, feature", rather
than "we implement a standard thing, but it is broken, so silently fix
it up". I would probably feel differently about this aspect of things if
there was no intention to actually communicate the presence of the
extension to userspace.

> For T-Head we _know_ from vendor-id and friends that the core supports
> this special brand of vectors.

If we _know_ on Foobar SoC that it supports xyz extension based on
vendor_id etc, should we add detection for that that too, using those
as a basis? I really don't want to have a precedent for T-Head getting
to use this method (will the same logic apply to their bitmanip stuff?),
that is not going to be applied to other vendors.

Hopefully that better explains where I am coming from, lmk if I am
overlooking something that should be obvious.

Cheers,
Conor.
kernel test robot June 23, 2023, 1:47 p.m. UTC | #5
Hi Heiko,

kernel test robot noticed the following build errors:

[auto build test ERROR on next-20230622]
[cannot apply to linus/master v6.4-rc7 v6.4-rc6 v6.4-rc5 v6.4-rc7]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Heiko-Stuebner/RISC-V-move-vector-available-status-into-a-dedicated-variable/20230623-081314
base:   next-20230622
patch link:    https://lore.kernel.org/r/20230622231305.631331-4-heiko%40sntech.de
patch subject: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling
config: riscv-rv32_defconfig (https://download.01.org/0day-ci/archive/20230623/202306232111.5WpYab2n-lkp@intel.com/config)
compiler: riscv32-linux-gcc (GCC) 12.3.0
reproduce: (https://download.01.org/0day-ci/archive/20230623/202306232111.5WpYab2n-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202306232111.5WpYab2n-lkp@intel.com/

All errors (new ones prefixed by >>):

   arch/riscv/include/asm/vector.h: Assembler messages:
>> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1'
>> arch/riscv/include/asm/vector.h:194: Error: unrecognized opcode `srliw t4,a1,1'
>> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards
   arch/riscv/include/asm/vector.h:203: Error: attempt to move .org backwards
--
   arch/riscv/include/asm/vector.h: Assembler messages:
>> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1'
>> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards
--
   arch/riscv/include/asm/vector.h: Assembler messages:
>> arch/riscv/include/asm/vector.h:194: Error: unrecognized opcode `srliw t4,a1,1'
>> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1'
   arch/riscv/include/asm/vector.h:203: Error: attempt to move .org backwards
>> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards


vim +162 arch/riscv/include/asm/vector.h

03c3fcd9941a17 Greentime Hu   2023-06-05  150  
03c3fcd9941a17 Greentime Hu   2023-06-05  151  static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
03c3fcd9941a17 Greentime Hu   2023-06-05  152  {
5255e253b722bb Heiko Stuebner 2023-06-23  153  	register u32 t1 asm("t1") = (SR_FS);
5255e253b722bb Heiko Stuebner 2023-06-23  154  
5255e253b722bb Heiko Stuebner 2023-06-23  155  	/*
5255e253b722bb Heiko Stuebner 2023-06-23  156  	 * Similar to __vstate_csr_save above, restore values for the
5255e253b722bb Heiko Stuebner 2023-06-23  157  	 * separate VXRM and VXSAT CSRs from the vcsr variable.
5255e253b722bb Heiko Stuebner 2023-06-23  158  	 */
5255e253b722bb Heiko Stuebner 2023-06-23  159  	asm volatile (ALTERNATIVE(
03c3fcd9941a17 Greentime Hu   2023-06-05  160  		".option push\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  161  		".option arch, +v\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05 @162  		"vsetvl	 x0, %2, %1\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  163  		".option pop\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  164  		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  165  		"csrw	" __stringify(CSR_VCSR) ", %3\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  166  		__nops(6),
5255e253b722bb Heiko Stuebner 2023-06-23  167  		"csrs	sstatus, t1\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  168  		".option push\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23 @169  		".option arch, +v\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  170  		"vsetvl	 x0, %2, %1\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  171  		".option pop\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  172  		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  173  		"srliw	t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  174  		"andi	t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  175  		"csrw	" __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  176  		"andi	%3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  177  		"csrw	" __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  178  		"csrc	sstatus, t1\n\t",
5255e253b722bb Heiko Stuebner 2023-06-23  179  		THEAD_VENDOR_ID,
5255e253b722bb Heiko Stuebner 2023-06-23  180  		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
03c3fcd9941a17 Greentime Hu   2023-06-05  181  		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
5255e253b722bb Heiko Stuebner 2023-06-23  182  		    "r" (src->vcsr), "r"(t1) : "t4");
03c3fcd9941a17 Greentime Hu   2023-06-05  183  }
03c3fcd9941a17 Greentime Hu   2023-06-05  184  
03c3fcd9941a17 Greentime Hu   2023-06-05  185  static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
03c3fcd9941a17 Greentime Hu   2023-06-05  186  					 void *datap)
03c3fcd9941a17 Greentime Hu   2023-06-05  187  {
03c3fcd9941a17 Greentime Hu   2023-06-05  188  	unsigned long vl;
03c3fcd9941a17 Greentime Hu   2023-06-05  189  
03c3fcd9941a17 Greentime Hu   2023-06-05  190  	riscv_v_enable();
03c3fcd9941a17 Greentime Hu   2023-06-05  191  	__vstate_csr_save(save_to);
5255e253b722bb Heiko Stuebner 2023-06-23  192  	asm volatile (ALTERNATIVE(
5255e253b722bb Heiko Stuebner 2023-06-23  193  		"nop\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05 @194  		".option push\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  195  		".option arch, +v\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  196  		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  197  		"vse8.v		v0, (%1)\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  198  		"add		%1, %1, %0\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  199  		"vse8.v		v8, (%1)\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  200  		"add		%1, %1, %0\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  201  		"vse8.v		v16, (%1)\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  202  		"add		%1, %1, %0\n\t"
03c3fcd9941a17 Greentime Hu   2023-06-05  203  		"vse8.v		v24, (%1)\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  204  		".option pop\n\t",
5255e253b722bb Heiko Stuebner 2023-06-23  205  		"mv		t0, %1\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  206  		THEAD_VSETVLI_T4X0E8M8D1
5255e253b722bb Heiko Stuebner 2023-06-23  207  		THEAD_VSB_V_V0T0
5255e253b722bb Heiko Stuebner 2023-06-23  208  		"addi		t0, t0, 128\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  209  		THEAD_VSB_V_V8T0
5255e253b722bb Heiko Stuebner 2023-06-23  210  		"addi		t0, t0, 128\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  211  		THEAD_VSB_V_V16T0
5255e253b722bb Heiko Stuebner 2023-06-23  212  		"addi		t0, t0, 128\n\t"
5255e253b722bb Heiko Stuebner 2023-06-23  213  		THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
5255e253b722bb Heiko Stuebner 2023-06-23  214  		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
5255e253b722bb Heiko Stuebner 2023-06-23  215  		: "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
03c3fcd9941a17 Greentime Hu   2023-06-05  216  	riscv_v_disable();
03c3fcd9941a17 Greentime Hu   2023-06-05  217  }
03c3fcd9941a17 Greentime Hu   2023-06-05  218
Stefan O'Rear June 24, 2023, 5:18 a.m. UTC | #6
On Fri, Jun 23, 2023, at 6:40 AM, Heiko Stübner wrote:
> Hey Conor,
>
> Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley:
>> On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
>> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
>> > 
>> > T-Head C9xx cores implement an older version (0.7.1) of the vector
>> > specification.
>> > 
>> > Relevant changes concerning the kernel are:
>> > - different placement of the SR_VS bit for the vector unit status
>> > - different encoding of the vsetvli instruction
>> > - different instructions for loads and stores
>> > 
>> > And a fixed VLEN of 128.
>> > 
>> > The in-kernel access to vector instances is limited to the save and
>> > restore of process states so the above mentioned areas can simply be
>> > handled via the alternatives framework, similar to other T-Head specific
>> > issues.
>> > 
>> > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
>> > ---
>> >  arch/riscv/Kconfig.errata            |  13 +++
>> >  arch/riscv/errata/thead/errata.c     |  32 ++++++
>> >  arch/riscv/include/asm/csr.h         |  24 ++++-
>> >  arch/riscv/include/asm/errata_list.h |  45 ++++++++-
>> >  arch/riscv/include/asm/vector.h      | 139 +++++++++++++++++++++++++--
>> >  arch/riscv/kernel/vector.c           |   2 +-
>> >  6 files changed, 238 insertions(+), 17 deletions(-)
>> > 
>> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
>> > index 0c8f4652cd82..b461312dd452 100644
>> > --- a/arch/riscv/Kconfig.errata
>> > +++ b/arch/riscv/Kconfig.errata
>> > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
>> >  
>> >  	  If you don't know what to do here, say "Y".
>> >  
>> > +config ERRATA_THEAD_VECTOR
>> > +	bool "Apply T-Head Vector errata"
>> > +	depends on ERRATA_THEAD && RISCV_ISA_V
>> > +	default y
>> > +	help
>> > +	  The T-Head C9xx cores implement an earlier version 0.7.1
>> > +	  of the vector extensions.
>> > +
>> > +	  This will apply the necessary errata to handle the non-standard
>> > +	  behaviour via when switch to and from vector mode for processes.

Doesn't make sense.  "This will apply the necessary errata to handle the
non-standard behavior when enabling, disabling, or swapping vector state for
processes."?

>> > +
>> > +	  If you don't know what to do here, say "Y".
>> > +
>> >  endmenu # "CPU errata selection"
>> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
>> > index c259dc925ec1..c41ec84bc8a5 100644
>> > --- a/arch/riscv/errata/thead/errata.c
>> > +++ b/arch/riscv/errata/thead/errata.c
>> > @@ -15,6 +15,7 @@
>> >  #include <asm/errata_list.h>
>> >  #include <asm/hwprobe.h>
>> >  #include <asm/patch.h>
>> > +#include <asm/vector.h>
>> >  #include <asm/vendorid_list.h>
>> >  
>> >  static bool errata_probe_pbmt(unsigned int stage,
>> > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
>> >  	return true;
>> >  }
>> >  
>> > +static bool errata_probe_vector(unsigned int stage,
>> > +				unsigned long arch_id, unsigned long impid)
>> > +{
>> > +	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
>> > +		return false;
>> > +
>> > +	/* target-c9xx cores report arch_id and impid as 0 */
>> > +	if (arch_id != 0 || impid != 0)
>> > +		return false;
>> > +
>> > +	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
>> > +		/*
>> > +		 * Disable VECTOR to detect illegal usage of vector in kernel.
>> > +		 * This is normally done in _start_kernel but with the
>> > +		 * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
>> > +		 * vector-0.7.1 and the vector-1.0-bits are unused there.
>> > +		 */
>> > +		csr_clear(CSR_STATUS, SR_VS_THEAD);
>> > +		return false;
>> > +	}
>> > +
>> > +	/* let has_vector() return true and set the static vlen */
>> 
>> Hmm, I was wondering about how you were going to communicate this to
>> userspace, since you're not going to be setting "v" in your DT, so
>> there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
>> this is your intention, as you'd not need to drop the alternative-based
>> stuff from has_vector() if it wasn't)
>
> I'm working on the assumption that the t-head vector is way to different
> from the official vector, that a userspace will definitly need to handle this
> in some way specially and we can't claim to use a "real" vector spec.
>
> So in this first step, my goal is to simply allow userspace programs
> compiled to use the t-head vector instructions (i.e. 0.7.1 presumably) to
> not hang the kernel and do all the necessary bringup and teardown needed
> for executing those vector instructions ;-) .
>
>
>> I don't think you can do this, as things stand, because of how hwprobe
>> operates:
>> 
>> static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
>> 			     const struct cpumask *cpus)
>> {
>> 	...
>> 
>> 	if (has_vector())
>> 		pair->value |= RISCV_HWPROBE_IMA_V;
>> 
>> 	...
>> }
>> 
>>   * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
>>     version 1.0 of the RISC-V Vector extension manual.
>> 
>> You'll need to change hwprobe to use has_vector() &&
>> riscv_has_extension_unlikely(v), or similar, as the condition for
>> reporting.
>
> ah right, and yes I need to adapt hwprobe as you wrote.
>
>
>> You'll also need some other way to communicate to userspace
>> that T-Head's vector stuff is supported, no?
>
> As I said above, baby-steps - not-dying first ;-) .

(Count me as a vote for adding a new RISCV_HWPROBE_KEY_ - ints are cheap and
apart from the half-implemented heterogenous case, the only advantage of
hwprobe over hwcap is that we can support a virtually unlimited number of
draft and vendor extensions.)

>> I'm also _really_ unconvinced that turning on extensions that were not
>> indicated in the DT or via ACPI is something we should be doing. Have I
>> missed something here that'd make that assessment inaccurate?
>
> Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration
> space (sermon of DT maintainers for years), so the ISA string in DT will
> simply describe _all_ extensions the hardware supports. So there _should_
> never be a case of "I want to disable vectors and will remove the letter
> from the ISA string".
>
> For T-Head we _know_ from vendor-id and friends that the core supports
> this special brand of vectors.

C906 supports t-head/0.7.1 vectors as a configuration option.  The C906 in
the D1 and BL808 has vectors, the recently announced CV1800B has one C906
with vectors and one without, and I vaguely remember seeing a chip with only
a non-vector C906.

C908 (announced, no manual yet) claims V 1.0 support.  Presumably it will
not support 0.7.1.

C910 (exists on evaluation boards) lacks vector support.

C920 (TH1520, SG2042, etc) has 0.7.1 support, at least superficially
compatible with C906-with-vectors.  Hopefully we can share errata.

This probably needs to be handled as an orthogonal "xtheadv" or "v0p7p1"
extension in whatever replaces riscv,isa.

> We're also turning on the t-head equivalent of svpbmt and zicbom with
> probably the same reasoning.

In an ideal world those would be handled as extensions as well - T-Head fixed
their vectors with the C908 so they might do standards-compliant Svpbmt and
Zicbom in the future.

>> FWIW I am currently working on kernel-side support for the new
>> extension properties that I have posted dt-binding patches for.
>> I'll go post it once Palmer has merged his current set of patches in his
>> staging repo into for-next, as I've got a lot of deps on riscv,isa
>> parser changes.
>> I'm really hoping that it provides an easier way to go off probing for
>> vendor specific stuff for DT-based systems, since it will no longer
>> require complex probing, just an of_property_match_string() for each
>> possible cpu and we could very well provide a vendor hook during that
>> process.
>> Clearly though, that stuff is not yet merged as it has not even been
>> posted yet.
>
> And with the comments I received, T-Head vector also is not ready for
> prime-time yet, so we're all good :-)
>
>
> Heiko
>
>
>> Current WIP of that is here:
>> https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer
>> 
>> > +	riscv_vector_supported();
>> > +	riscv_v_vsize = 128 / 8 * 32;
>> > +
>> > +	return true;
>> > +}
>> > +
>> >  static u32 thead_errata_probe(unsigned int stage,
>> >  			      unsigned long archid, unsigned long impid)
>> >  {
>> > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
>> >  	if (errata_probe_pmu(stage, archid, impid))
>> >  		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
>> >  
>> > +	if (errata_probe_vector(stage, archid, impid))
>> > +		cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
>> > +
>> >  	return cpu_req_errata;
>> >  }
>> >  
>> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
>> > index 2d79bca6ffe8..521b3b939e51 100644
>> > --- a/arch/riscv/include/asm/csr.h
>> > +++ b/arch/riscv/include/asm/csr.h
>> > @@ -24,11 +24,25 @@
>> >  #define SR_FS_CLEAN	_AC(0x00004000, UL)
>> >  #define SR_FS_DIRTY	_AC(0x00006000, UL)
>> >  
>> > -#define SR_VS		_AC(0x00000600, UL) /* Vector Status */
>> > -#define SR_VS_OFF	_AC(0x00000000, UL)
>> > -#define SR_VS_INITIAL	_AC(0x00000200, UL)
>> > -#define SR_VS_CLEAN	_AC(0x00000400, UL)
>> > -#define SR_VS_DIRTY	_AC(0x00000600, UL)
>> > +#define SR_VS_OFF		_AC(0x00000000, UL)
>> > +
>> > +#define SR_VS_1_0		_AC(0x00000600, UL) /* Vector Status */
>> > +#define SR_VS_INITIAL_1_0	_AC(0x00000200, UL)
>> > +#define SR_VS_CLEAN_1_0		_AC(0x00000400, UL)
>> > +#define SR_VS_DIRTY_1_0		_AC(0x00000600, UL)
>> > +
>> > +#define SR_VS_THEAD		_AC(0x01800000, UL) /* Vector Status */
>> > +#define SR_VS_INITIAL_THEAD	_AC(0x00800000, UL)
>> > +#define SR_VS_CLEAN_THEAD	_AC(0x01000000, UL)
>> > +#define SR_VS_DIRTY_THEAD	_AC(0x01800000, UL)
>> > +
>> > +/*
>> > + * Always default to vector-1.0 handling in assembly and let the broken
>> > + * implementations handle their case separately.
>> > + */
>> > +#ifdef __ASSEMBLY__
>> > +#define SR_VS			SR_VS_1_0
>> > +#endif
>> >  
>> >  #define SR_XS		_AC(0x00018000, UL) /* Extension Status */
>> >  #define SR_XS_OFF	_AC(0x00000000, UL)
>> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
>> > index fb1a810f3d8c..ab21fadbe9c6 100644
>> > --- a/arch/riscv/include/asm/errata_list.h
>> > +++ b/arch/riscv/include/asm/errata_list.h
>> > @@ -21,7 +21,8 @@
>> >  #define	ERRATA_THEAD_PBMT 0
>> >  #define	ERRATA_THEAD_CMO 1
>> >  #define	ERRATA_THEAD_PMU 2
>> > -#define	ERRATA_THEAD_NUMBER 3
>> > +#define	ERRATA_THEAD_VECTOR 3
>> > +#define	ERRATA_THEAD_NUMBER 4
>> >  #endif
>> >  
>> >  #ifdef __ASSEMBLY__
>> > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE(						\
>> >  	: "=r" (__ovl) :						\
>> >  	: "memory")
>> >  
>> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
>> > +
>> > +#define THEAD_C9XX_CSR_VXSAT			0x9
>> > +#define THEAD_C9XX_CSR_VXRM			0xa

These CSR numbers haven't changed.  Assuming that we actually need to handle
vxsat and vxrm as part of the vector state (if t-head decided to make them
controlled by sstatus.VS despite being in fcsr), why not unconditionally
define CSR_VXSAT and CSR_VXRM in csr.h?

>> > +
>> > +/*
>> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
>> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
>> > + * vsetvli	t4, x0, e8, m8, d1
>> > + */
>> > +#define THEAD_VSETVLI_T4X0E8M8D1	".long	0x00307ed7\n\t"
>> > +
>> > +/*
>> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
>> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
>> > + * the call resulting in a different encoding and then using a value for
>> > + * the "mop" field that is not part of vector-0.7.1
>> > + * So encode specific variants for vstate_save and _restore.
>> > + */
>> > +#define THEAD_VSB_V_V0T0		".long	0x02028027\n\t"
>> > +#define THEAD_VSB_V_V8T0		".long	0x02028427\n\t"
>> > +#define THEAD_VSB_V_V16T0		".long	0x02028827\n\t"
>> > +#define THEAD_VSB_V_V24T0		".long	0x02028c27\n\t"
>> > +#define THEAD_VLB_V_V0T0		".long	0x012028007\n\t"
>> > +#define THEAD_VLB_V_V8T0		".long	0x012028407\n\t"
>> > +#define THEAD_VLB_V_V16T0		".long	0x012028807\n\t"
>> > +#define THEAD_VLB_V_V24T0		".long	0x012028c07\n\t"

.insn isn't supported by the kernel's minimum binutils version, but it _is_
supported by the oldest version of binutils that can assemble rvv 1.0
instructions.  OP_V requires 2.39 so I use a literal 0x57 instead.

very untested, and I leave it to your judgement whether it actually improves
readability:

#define THEAD_VSETVLI_T4X0E8M8D1  ".insn i 0x57, 7, t4, x0, 3\n\t"
#define THEAD_VSB_V_V0T0		".insn r STORE_FP, 0, 1, x0,  t0, x0\n\t"
#define THEAD_VSB_V_V8T0		".insn r STORE_FP, 0, 1, x8,  t0, x0\n\t"
#define THEAD_VSB_V_V16T0		".insn r STORE_FP, 0, 1, x16, t0, x0\n\t"
#define THEAD_VSB_V_V24T0		".insn r STORE_FP, 0, 1, x24, t0, x0\n\t"
#define THEAD_VSB_V_V0T0		".insn r LOAD_FP,  0, 9, x0,  t0, x0\n\t"
#define THEAD_VSB_V_V8T0		".insn r LOAD_FP,  0, 9, x8,  t0, x0\n\t"
#define THEAD_VSB_V_V16T0		".insn r LOAD_FP,  0, 9, x16, t0, x0\n\t"
#define THEAD_VSB_V_V24T0		".insn r LOAD_FP,  0, 9, x24, t0, x0\n\t"

>> > +
>> > +#define ALT_SR_VS_VECTOR_1_0_SHIFT	9
>> > +#define ALT_SR_VS_THEAD_SHIFT		23
>> > +
>> > +#define ALT_SR_VS(_val, prot)						\
>> > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3",				\
>> > +		"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,		\
>> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)	\
>> > +		: "=r"(_val)						\
>> > +		: "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT),	\
>> > +		  "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT),		\
>> > +		  "I"(ALT_SR_VS_VECTOR_1_0_SHIFT),			\
>> > +		  "I"(ALT_SR_VS_THEAD_SHIFT))

I think this can be simplified by removing the shifts and using the li
pseudoinstruction (which will become lui on the _THEAD_ arm).

>> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
>> > +
>> >  #endif /* __ASSEMBLY__ */
>> >  
>> >  #endif
>> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
>> > index 315c96d2b4d0..fa47f60f81e3 100644
>> > --- a/arch/riscv/include/asm/vector.h
>> > +++ b/arch/riscv/include/asm/vector.h
>> > @@ -18,6 +18,55 @@
>> >  #include <asm/hwcap.h>
>> >  #include <asm/csr.h>
>> >  #include <asm/asm.h>
>> > +#include <asm/errata_list.h>
>> > +
>> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
>> > +
>> > +static inline unsigned long riscv_sr_vs(void)
>> > +{
>> > +	u32 val;
>> > +
>> > +	ALT_SR_VS(val, SR_VS);
>> > +	return val;
>> > +}
>> > +
>> > +static inline unsigned long riscv_sr_vs_initial(void)
>> > +{
>> > +	u32 val;
>> > +
>> > +	ALT_SR_VS(val, SR_VS_INITIAL);
>> > +	return val;
>> > +}
>> > +
>> > +static inline unsigned long riscv_sr_vs_clean(void)
>> > +{
>> > +	u32 val;
>> > +
>> > +	ALT_SR_VS(val, SR_VS_CLEAN);
>> > +	return val;
>> > +}
>> > +
>> > +static inline unsigned long riscv_sr_vs_dirty(void)
>> > +{
>> > +	u32 val;
>> > +
>> > +	ALT_SR_VS(val, SR_VS_DIRTY);
>> > +	return val;
>> > +}
>> > +
>> > +#define SR_VS		riscv_sr_vs()
>> > +#define SR_VS_INITIAL	riscv_sr_vs_initial()
>> > +#define SR_VS_CLEAN	riscv_sr_vs_clean()
>> > +#define SR_VS_DIRTY	riscv_sr_vs_dirty()
>> > +
>> > +#else /* CONFIG_ERRATA_THEAD_VECTOR */
>> > +
>> > +#define SR_VS		SR_VS_1_0
>> > +#define SR_VS_INITIAL	SR_VS_INITIAL_1_0
>> > +#define SR_VS_CLEAN	SR_VS_CLEAN_1_0
>> > +#define SR_VS_DIRTY	SR_VS_DIRTY_1_0
>> > +
>> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
>> >  
>> >  extern bool riscv_v_supported;
>> >  void riscv_vector_supported(void);
>> > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
>> >  
>> >  static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
>> >  {
>> > -	asm volatile (
>> > +	register u32 t1 asm("t1") = (SR_FS);
>> > +
>> > +	/*
>> > +	 * CSR_VCSR is defined as
>> > +	 * [2:1] - vxrm[1:0]
>> > +	 * [0] - vxsat
>> > +	 * The earlier vector spec implemented by T-Head uses separate
>> > +	 * registers for the same bit-elements, so just combine those
>> > +	 * into the existing output field.
>> > +	 *
>> > +	 * Additionally T-Head cores need FS to be enabled when accessing
>> > +	 * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
>> > +	 * Though the cores do not implement the VXRM and VXSAT fields in the
>> > +	 * FCSR CSR that vector-0.7.1 specifies.

(for completeness only: this was found to be inaccurate in the previous thread)

>> > +	 */
>> > +	asm volatile (ALTERNATIVE(
>> >  		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
>> >  		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
>> >  		"csrr	%2, " __stringify(CSR_VL) "\n\t"
>> >  		"csrr	%3, " __stringify(CSR_VCSR) "\n\t"
>> > +		__nops(5),
>> > +		"csrs	sstatus, t1\n\t"
>> > +		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
>> > +		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
>> > +		"csrr	%2, " __stringify(CSR_VL) "\n\t"
>> > +		"csrr	%3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
>> > +		"slliw	%3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
>> > +		"csrr	t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
>> > +		"or	%3, %3, t4\n\t"
>> > +		"csrc	sstatus, t1\n\t",
>> > +		THEAD_VENDOR_ID,
>> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> >  		: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
>> > -		  "=r" (dest->vcsr) : :);
>> > +		  "=r" (dest->vcsr) : "r"(t1) : "t4");
>> >  }
>> >  
>> >  static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
>> >  {
>> > -	asm volatile (
>> > +	register u32 t1 asm("t1") = (SR_FS);
>> > +
>> > +	/*
>> > +	 * Similar to __vstate_csr_save above, restore values for the
>> > +	 * separate VXRM and VXSAT CSRs from the vcsr variable.
>> > +	 */
>> > +	asm volatile (ALTERNATIVE(
>> >  		".option push\n\t"
>> >  		".option arch, +v\n\t"
>> >  		"vsetvl	 x0, %2, %1\n\t"
>> >  		".option pop\n\t"
>> >  		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
>> >  		"csrw	" __stringify(CSR_VCSR) ", %3\n\t"
>> > +		__nops(6),
>> > +		"csrs	sstatus, t1\n\t"
>> > +		".option push\n\t"
>> > +		".option arch, +v\n\t"
>> > +		"vsetvl	 x0, %2, %1\n\t"
>> > +		".option pop\n\t"
>> > +		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
>> > +		"srliw	t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
>> > +		"andi	t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
>> > +		"csrw	" __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
>> > +		"andi	%3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
>> > +		"csrw	" __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
>> > +		"csrc	sstatus, t1\n\t",
>> > +		THEAD_VENDOR_ID,
>> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> >  		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
>> > -		    "r" (src->vcsr) :);
>> > +		    "r" (src->vcsr), "r"(t1) : "t4");
>> >  }
>> >  
>> >  static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>> > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>> >  
>> >  	riscv_v_enable();
>> >  	__vstate_csr_save(save_to);
>> > -	asm volatile (
>> > +	asm volatile (ALTERNATIVE(
>> > +		"nop\n\t"
>> >  		".option push\n\t"
>> >  		".option arch, +v\n\t"
>> >  		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
>> > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
>> >  		"vse8.v		v16, (%1)\n\t"
>> >  		"add		%1, %1, %0\n\t"
>> >  		"vse8.v		v24, (%1)\n\t"
>> > -		".option pop\n\t"
>> > -		: "=&r" (vl) : "r" (datap) : "memory");

Pre-existing bug: The assembly code modifies %1, which is an input operand.
I think this should be

-		: "=&r" (vl), "+r" (datap) : : "memory");

>> > +		".option pop\n\t",
>> > +		"mv		t0, %1\n\t"
>> > +		THEAD_VSETVLI_T4X0E8M8D1
>> > +		THEAD_VSB_V_V0T0
>> > +		"addi		t0, t0, 128\n\t"

We don't have a promise from T-Head that they won't introduce a core with
0.7.1 vectors and VLEN=256, and I'd rather not have code lying around that
will cause silent data corruption if that happens.  THEAD_VSETVLI_T4X0E8M8D1
has rd=t4 so adding t4 should work in this arm.

>> > +		THEAD_VSB_V_V8T0
>> > +		"addi		t0, t0, 128\n\t"
>> > +		THEAD_VSB_V_V16T0
>> > +		"addi		t0, t0, 128\n\t"
>> > +		THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
>> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> > +		: "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");

The bugfix for the existing code isn't needed here since you copied the
address into t0.

>> >  	riscv_v_disable();
>> >  }
>> >  
>> > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
>> >  	unsigned long vl;
>> >  
>> >  	riscv_v_enable();
>> > -	asm volatile (
>> > +	asm volatile (ALTERNATIVE(
>> > +		"nop\n\t"
>> >  		".option push\n\t"
>> >  		".option arch, +v\n\t"
>> >  		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
>> > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
>> >  		"vle8.v		v16, (%1)\n\t"
>> >  		"add		%1, %1, %0\n\t"
>> >  		"vle8.v		v24, (%1)\n\t"
>> > -		".option pop\n\t"
>> > -		: "=&r" (vl) : "r" (datap) : "memory");

-		: "=&r" (vl), "+r" (datap) : : "memory");

-s

>> > +		".option pop\n\t",
>> > +		"mv		t0, %1\n\t"
>> > +		THEAD_VSETVLI_T4X0E8M8D1
>> > +		THEAD_VLB_V_V0T0
>> > +		"addi		t0, t0, 128\n\t"
>> > +		THEAD_VLB_V_V8T0
>> > +		"addi		t0, t0, 128\n\t"
>> > +		THEAD_VLB_V_V16T0
>> > +		"addi		t0, t0, 128\n\t"
>> > +		THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
>> > +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
>> > +		: "=&r" (vl) : "r" (datap) : "t0", "t4");
>> >  	__vstate_csr_restore(restore_from);
>> >  	riscv_v_disable();
>> >  }
>> > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
>> > index 74178fb71805..51726890a4d0 100644
>> > --- a/arch/riscv/kernel/vector.c
>> > +++ b/arch/riscv/kernel/vector.c
>> > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
>> >  	u32 insn = (u32)regs->badaddr;
>> >  
>> >  	/* Do not handle if V is not supported, or disabled */
>> > -	if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
>> > +	if (!has_vector())
>> >  		return false;
>> >  
>> >  	/* If V has been enabled then it is not the first-use trap */
>> 
>
>
>
>
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Andrew Jones June 24, 2023, 10:59 a.m. UTC | #7
On Sat, Jun 24, 2023 at 01:18:26AM -0400, Stefan O'Rear wrote:
> On Fri, Jun 23, 2023, at 6:40 AM, Heiko Stübner wrote:
...
> >> > +
> >> > +/*
> >> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> >> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> >> > + * vsetvli	t4, x0, e8, m8, d1
> >> > + */
> >> > +#define THEAD_VSETVLI_T4X0E8M8D1	".long	0x00307ed7\n\t"
> >> > +
> >> > +/*
> >> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> >> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> >> > + * the call resulting in a different encoding and then using a value for
> >> > + * the "mop" field that is not part of vector-0.7.1
> >> > + * So encode specific variants for vstate_save and _restore.
> >> > + */
> >> > +#define THEAD_VSB_V_V0T0		".long	0x02028027\n\t"
> >> > +#define THEAD_VSB_V_V8T0		".long	0x02028427\n\t"
> >> > +#define THEAD_VSB_V_V16T0		".long	0x02028827\n\t"
> >> > +#define THEAD_VSB_V_V24T0		".long	0x02028c27\n\t"
> >> > +#define THEAD_VLB_V_V0T0		".long	0x012028007\n\t"
> >> > +#define THEAD_VLB_V_V8T0		".long	0x012028407\n\t"
> >> > +#define THEAD_VLB_V_V16T0		".long	0x012028807\n\t"
> >> > +#define THEAD_VLB_V_V24T0		".long	0x012028c07\n\t"
> 
> .insn isn't supported by the kernel's minimum binutils version, but it _is_
> supported by the oldest version of binutils that can assemble rvv 1.0
> instructions.  OP_V requires 2.39 so I use a literal 0x57 instead.
> 
> very untested, and I leave it to your judgement whether it actually improves
> readability:
> 
> #define THEAD_VSETVLI_T4X0E8M8D1  ".insn i 0x57, 7, t4, x0, 3\n\t"
> #define THEAD_VSB_V_V0T0		".insn r STORE_FP, 0, 1, x0,  t0, x0\n\t"
> #define THEAD_VSB_V_V8T0		".insn r STORE_FP, 0, 1, x8,  t0, x0\n\t"
> #define THEAD_VSB_V_V16T0		".insn r STORE_FP, 0, 1, x16, t0, x0\n\t"
> #define THEAD_VSB_V_V24T0		".insn r STORE_FP, 0, 1, x24, t0, x0\n\t"
> #define THEAD_VSB_V_V0T0		".insn r LOAD_FP,  0, 9, x0,  t0, x0\n\t"
> #define THEAD_VSB_V_V8T0		".insn r LOAD_FP,  0, 9, x8,  t0, x0\n\t"
> #define THEAD_VSB_V_V16T0		".insn r LOAD_FP,  0, 9, x16, t0, x0\n\t"
> #define THEAD_VSB_V_V24T0		".insn r LOAD_FP,  0, 9, x24, t0, x0\n\t"
>

We have the INSN_R() macro in arch/riscv/include/asm/insn-def.h for stuff
like this.

Thanks,
drew
Rémi Denis-Courmont June 27, 2023, 3:21 p.m. UTC | #8
Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit :
> From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> 
> T-Head C9xx cores implement an older version (0.7.1) of the vector
> specification.
> 
> Relevant changes concerning the kernel are:
> - different placement of the SR_VS bit for the vector unit status
> - different encoding of the vsetvli instruction
> - different instructions for loads and stores
> 
> And a fixed VLEN of 128.

Ultimately, conformant hardware also has a fixed VLEN of some value.

So why is that relevant here? is the vlenb CSR not implemented? And even if 
so, c the value not be retrieved with vsetvli?
Conor Dooley June 27, 2023, 4:12 p.m. UTC | #9
On Tue, Jun 27, 2023 at 06:21:14PM +0300, Rémi Denis-Courmont wrote:
> Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit :
> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > 
> > T-Head C9xx cores implement an older version (0.7.1) of the vector
> > specification.
> > 
> > Relevant changes concerning the kernel are:
> > - different placement of the SR_VS bit for the vector unit status
> > - different encoding of the vsetvli instruction
> > - different instructions for loads and stores
> > 
> > And a fixed VLEN of 128.
> 
> Ultimately, conformant hardware also has a fixed VLEN of some value.
> 
> So why is that relevant here? is the vlenb CSR not implemented? And even if 
> so, c the value not be retrieved with vsetvli?

I was looking around a bit, and saw a random comment on reddit today
claiming that the c920 has a vlen of 256. Obviously that conflicts with
what is written here, but it is reddit...
Do you know if that is true Heiko, and if it is true, does the c920
populate archid/impid with non-zero values?

Cheers,
Conor.
Palmer Dabbelt June 28, 2023, 2:23 p.m. UTC | #10
On Tue, 27 Jun 2023 09:12:48 PDT (-0700), Conor Dooley wrote:
> On Tue, Jun 27, 2023 at 06:21:14PM +0300, Rémi Denis-Courmont wrote:
>> Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit :
>> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
>> >
>> > T-Head C9xx cores implement an older version (0.7.1) of the vector
>> > specification.
>> >
>> > Relevant changes concerning the kernel are:
>> > - different placement of the SR_VS bit for the vector unit status
>> > - different encoding of the vsetvli instruction
>> > - different instructions for loads and stores
>> >
>> > And a fixed VLEN of 128.
>>
>> Ultimately, conformant hardware also has a fixed VLEN of some value.
>>
>> So why is that relevant here? is the vlenb CSR not implemented? And even if
>> so, c the value not be retrieved with vsetvli?
>
> I was looking around a bit, and saw a random comment on reddit today
> claiming that the c920 has a vlen of 256. Obviously that conflicts with
> what is written here, but it is reddit...
> Do you know if that is true Heiko, and if it is true, does the c920
> populate archid/impid with non-zero values?

We were talking in the patchwork a bit, it looks like there's already 
some aliasing in the T-Head implementations where the same
marchid/mvendorid/mimplid tuple has different extensions.  At that 
point we can't really detected based on the CSRs, so it sort of doesn't 
matter what the other stuff does (at least around V, if there's some 
other errata that's uniquely identified by the tuple then we can deal 
with it).

So I think we need to add a way of indicating the hardware supports 
T-Head V from the device tree.  I'd asked Charlie to add hwprobe support 
for the T-Head V stuff, LMK if we should pick up the DT side of things 
as well?

>
> Cheers,
> Conor.
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Andy Chiu June 28, 2023, 4:07 p.m. UTC | #11
On Fri, Jun 23, 2023 at 5:53 PM Conor Dooley <conor.dooley@microchip.com> wrote:
>
> Hey Heiko,
>
> On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote:
> > From: Heiko Stuebner <heiko.stuebner@vrull.eu>
> >
> > T-Head C9xx cores implement an older version (0.7.1) of the vector
> > specification.
> >
> > Relevant changes concerning the kernel are:
> > - different placement of the SR_VS bit for the vector unit status
> > - different encoding of the vsetvli instruction
> > - different instructions for loads and stores
> >
> > And a fixed VLEN of 128.
> >
> > The in-kernel access to vector instances is limited to the save and
> > restore of process states so the above mentioned areas can simply be
> > handled via the alternatives framework, similar to other T-Head specific
> > issues.
> >
> > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
> > ---
> >  arch/riscv/Kconfig.errata            |  13 +++
> >  arch/riscv/errata/thead/errata.c     |  32 ++++++
> >  arch/riscv/include/asm/csr.h         |  24 ++++-
> >  arch/riscv/include/asm/errata_list.h |  45 ++++++++-
> >  arch/riscv/include/asm/vector.h      | 139 +++++++++++++++++++++++++--
> >  arch/riscv/kernel/vector.c           |   2 +-
> >  6 files changed, 238 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
> > index 0c8f4652cd82..b461312dd452 100644
> > --- a/arch/riscv/Kconfig.errata
> > +++ b/arch/riscv/Kconfig.errata
> > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU
> >
> >         If you don't know what to do here, say "Y".
> >
> > +config ERRATA_THEAD_VECTOR
> > +     bool "Apply T-Head Vector errata"
> > +     depends on ERRATA_THEAD && RISCV_ISA_V
> > +     default y
> > +     help
> > +       The T-Head C9xx cores implement an earlier version 0.7.1
> > +       of the vector extensions.
> > +
> > +       This will apply the necessary errata to handle the non-standard
> > +       behaviour via when switch to and from vector mode for processes.
> > +
> > +       If you don't know what to do here, say "Y".
> > +
> >  endmenu # "CPU errata selection"
> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
> > index c259dc925ec1..c41ec84bc8a5 100644
> > --- a/arch/riscv/errata/thead/errata.c
> > +++ b/arch/riscv/errata/thead/errata.c
> > @@ -15,6 +15,7 @@
> >  #include <asm/errata_list.h>
> >  #include <asm/hwprobe.h>
> >  #include <asm/patch.h>
> > +#include <asm/vector.h>
> >  #include <asm/vendorid_list.h>
> >
> >  static bool errata_probe_pbmt(unsigned int stage,
> > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage,
> >       return true;
> >  }
> >
> > +static bool errata_probe_vector(unsigned int stage,
> > +                             unsigned long arch_id, unsigned long impid)
> > +{
> > +     if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
> > +             return false;
> > +
> > +     /* target-c9xx cores report arch_id and impid as 0 */
> > +     if (arch_id != 0 || impid != 0)
> > +             return false;
> > +
> > +     if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
> > +             /*
> > +              * Disable VECTOR to detect illegal usage of vector in kernel.
> > +              * This is normally done in _start_kernel but with the
> > +              * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
> > +              * vector-0.7.1 and the vector-1.0-bits are unused there.
> > +              */
> > +             csr_clear(CSR_STATUS, SR_VS_THEAD);
> > +             return false;
> > +     }
> > +
> > +     /* let has_vector() return true and set the static vlen */
>
> Hmm, I was wondering about how you were going to communicate this to
> userspace, since you're not going to be setting "v" in your DT, so
> there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that
> this is your intention, as you'd not need to drop the alternative-based
> stuff from has_vector() if it wasn't)
>
> I don't think you can do this, as things stand, because of how hwprobe
> operates:
>
> static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
>                              const struct cpumask *cpus)
> {
>         ...
>
>         if (has_vector())
>                 pair->value |= RISCV_HWPROBE_IMA_V;
>
>         ...
> }
>
>   * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by
>     version 1.0 of the RISC-V Vector extension manual.
>
> You'll need to change hwprobe to use has_vector() &&
> riscv_has_extension_unlikely(v), or similar, as the condition for
> reporting. You'll also need some other way to communicate to userspace
> that T-Head's vector stuff is supported, no?
>
> I'm also _really_ unconvinced that turning on extensions that were not
> indicated in the DT or via ACPI is something we should be doing. Have I
> missed something here that'd make that assessment inaccurate?
>
> Cheers,
> Conor.
>
> FWIW I am currently working on kernel-side support for the new
> extension properties that I have posted dt-binding patches for.
> I'll go post it once Palmer has merged his current set of patches in his
> staging repo into for-next, as I've got a lot of deps on riscv,isa
> parser changes.
> I'm really hoping that it provides an easier way to go off probing for
> vendor specific stuff for DT-based systems, since it will no longer
> require complex probing, just an of_property_match_string() for each
> possible cpu and we could very well provide a vendor hook during that
> process.
> Clearly though, that stuff is not yet merged as it has not even been
> posted yet.
>
> Current WIP of that is here:
> https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer
>
> > +     riscv_vector_supported();
> > +     riscv_v_vsize = 128 / 8 * 32;
> > +
> > +     return true;
> > +}
> > +
> >  static u32 thead_errata_probe(unsigned int stage,
> >                             unsigned long archid, unsigned long impid)
> >  {
> > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage,
> >       if (errata_probe_pmu(stage, archid, impid))
> >               cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
> >
> > +     if (errata_probe_vector(stage, archid, impid))
> > +             cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
> > +
> >       return cpu_req_errata;
> >  }
> >
> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
> > index 2d79bca6ffe8..521b3b939e51 100644
> > --- a/arch/riscv/include/asm/csr.h
> > +++ b/arch/riscv/include/asm/csr.h
> > @@ -24,11 +24,25 @@
> >  #define SR_FS_CLEAN  _AC(0x00004000, UL)
> >  #define SR_FS_DIRTY  _AC(0x00006000, UL)
> >
> > -#define SR_VS                _AC(0x00000600, UL) /* Vector Status */
> > -#define SR_VS_OFF    _AC(0x00000000, UL)
> > -#define SR_VS_INITIAL        _AC(0x00000200, UL)
> > -#define SR_VS_CLEAN  _AC(0x00000400, UL)
> > -#define SR_VS_DIRTY  _AC(0x00000600, UL)
> > +#define SR_VS_OFF            _AC(0x00000000, UL)
> > +
> > +#define SR_VS_1_0            _AC(0x00000600, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_1_0    _AC(0x00000200, UL)
> > +#define SR_VS_CLEAN_1_0              _AC(0x00000400, UL)
> > +#define SR_VS_DIRTY_1_0              _AC(0x00000600, UL)
> > +
> > +#define SR_VS_THEAD          _AC(0x01800000, UL) /* Vector Status */
> > +#define SR_VS_INITIAL_THEAD  _AC(0x00800000, UL)
> > +#define SR_VS_CLEAN_THEAD    _AC(0x01000000, UL)
> > +#define SR_VS_DIRTY_THEAD    _AC(0x01800000, UL)
> > +
> > +/*
> > + * Always default to vector-1.0 handling in assembly and let the broken
> > + * implementations handle their case separately.
> > + */
> > +#ifdef __ASSEMBLY__
> > +#define SR_VS                        SR_VS_1_0
> > +#endif
> >
> >  #define SR_XS                _AC(0x00018000, UL) /* Extension Status */
> >  #define SR_XS_OFF    _AC(0x00000000, UL)
> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> > index fb1a810f3d8c..ab21fadbe9c6 100644
> > --- a/arch/riscv/include/asm/errata_list.h
> > +++ b/arch/riscv/include/asm/errata_list.h
> > @@ -21,7 +21,8 @@
> >  #define      ERRATA_THEAD_PBMT 0
> >  #define      ERRATA_THEAD_CMO 1
> >  #define      ERRATA_THEAD_PMU 2
> > -#define      ERRATA_THEAD_NUMBER 3
> > +#define      ERRATA_THEAD_VECTOR 3
> > +#define      ERRATA_THEAD_NUMBER 4
> >  #endif
> >
> >  #ifdef __ASSEMBLY__
> > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE(                                                \
> >       : "=r" (__ovl) :                                                \
> >       : "memory")
> >
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +#define THEAD_C9XX_CSR_VXSAT                 0x9
> > +#define THEAD_C9XX_CSR_VXRM                  0xa
> > +
> > +/*
> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> > + * vsetvli   t4, x0, e8, m8, d1
> > + */
> > +#define THEAD_VSETVLI_T4X0E8M8D1     ".long  0x00307ed7\n\t"
> > +
> > +/*
> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
> > + * the call resulting in a different encoding and then using a value for
> > + * the "mop" field that is not part of vector-0.7.1
> > + * So encode specific variants for vstate_save and _restore.
> > + */
> > +#define THEAD_VSB_V_V0T0             ".long  0x02028027\n\t"
> > +#define THEAD_VSB_V_V8T0             ".long  0x02028427\n\t"
> > +#define THEAD_VSB_V_V16T0            ".long  0x02028827\n\t"
> > +#define THEAD_VSB_V_V24T0            ".long  0x02028c27\n\t"
> > +#define THEAD_VLB_V_V0T0             ".long  0x012028007\n\t"
> > +#define THEAD_VLB_V_V8T0             ".long  0x012028407\n\t"
> > +#define THEAD_VLB_V_V16T0            ".long  0x012028807\n\t"
> > +#define THEAD_VLB_V_V24T0            ".long  0x012028c07\n\t"
> > +
> > +#define ALT_SR_VS_VECTOR_1_0_SHIFT   9
> > +#define ALT_SR_VS_THEAD_SHIFT                23
> > +
> > +#define ALT_SR_VS(_val, prot)                                                \
> > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3",                                \
> > +             "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,          \
> > +             ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)        \
> > +             : "=r"(_val)                                            \
> > +             : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT),        \
> > +               "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT),           \
> > +               "I"(ALT_SR_VS_VECTOR_1_0_SHIFT),                      \
> > +               "I"(ALT_SR_VS_THEAD_SHIFT))
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> >  #endif /* __ASSEMBLY__ */
> >
> >  #endif
> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
> > index 315c96d2b4d0..fa47f60f81e3 100644
> > --- a/arch/riscv/include/asm/vector.h
> > +++ b/arch/riscv/include/asm/vector.h
> > @@ -18,6 +18,55 @@
> >  #include <asm/hwcap.h>
> >  #include <asm/csr.h>
> >  #include <asm/asm.h>
> > +#include <asm/errata_list.h>
> > +
> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> > +
> > +static inline unsigned long riscv_sr_vs(void)
> > +{
> > +     u32 val;
> > +
> > +     ALT_SR_VS(val, SR_VS);
> > +     return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_initial(void)
> > +{
> > +     u32 val;
> > +
> > +     ALT_SR_VS(val, SR_VS_INITIAL);
> > +     return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_clean(void)
> > +{
> > +     u32 val;
> > +
> > +     ALT_SR_VS(val, SR_VS_CLEAN);
> > +     return val;
> > +}
> > +
> > +static inline unsigned long riscv_sr_vs_dirty(void)
> > +{
> > +     u32 val;
> > +
> > +     ALT_SR_VS(val, SR_VS_DIRTY);
> > +     return val;
> > +}
> > +
> > +#define SR_VS                riscv_sr_vs()
> > +#define SR_VS_INITIAL        riscv_sr_vs_initial()
> > +#define SR_VS_CLEAN  riscv_sr_vs_clean()
> > +#define SR_VS_DIRTY  riscv_sr_vs_dirty()
> > +
> > +#else /* CONFIG_ERRATA_THEAD_VECTOR */
> > +
> > +#define SR_VS                SR_VS_1_0
> > +#define SR_VS_INITIAL        SR_VS_INITIAL_1_0
> > +#define SR_VS_CLEAN  SR_VS_CLEAN_1_0
> > +#define SR_VS_DIRTY  SR_VS_DIRTY_1_0
> > +
> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> >
> >  extern bool riscv_v_supported;
> >  void riscv_vector_supported(void);
> > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void)
> >
> >  static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
> >  {
> > -     asm volatile (
> > +     register u32 t1 asm("t1") = (SR_FS);
> > +
> > +     /*
> > +      * CSR_VCSR is defined as
> > +      * [2:1] - vxrm[1:0]
> > +      * [0] - vxsat
> > +      * The earlier vector spec implemented by T-Head uses separate
> > +      * registers for the same bit-elements, so just combine those
> > +      * into the existing output field.
> > +      *
> > +      * Additionally T-Head cores need FS to be enabled when accessing
> > +      * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
> > +      * Though the cores do not implement the VXRM and VXSAT fields in the
> > +      * FCSR CSR that vector-0.7.1 specifies.
> > +      */
> > +     asm volatile (ALTERNATIVE(
> >               "csrr   %0, " __stringify(CSR_VSTART) "\n\t"
> >               "csrr   %1, " __stringify(CSR_VTYPE) "\n\t"
> >               "csrr   %2, " __stringify(CSR_VL) "\n\t"
> >               "csrr   %3, " __stringify(CSR_VCSR) "\n\t"
> > +             __nops(5),
> > +             "csrs   sstatus, t1\n\t"
> > +             "csrr   %0, " __stringify(CSR_VSTART) "\n\t"
> > +             "csrr   %1, " __stringify(CSR_VTYPE) "\n\t"
> > +             "csrr   %2, " __stringify(CSR_VL) "\n\t"
> > +             "csrr   %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
> > +             "slliw  %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > +             "csrr   t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
> > +             "or     %3, %3, t4\n\t"
> > +             "csrc   sstatus, t1\n\t",
> > +             THEAD_VENDOR_ID,
> > +             ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> >               : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
> > -               "=r" (dest->vcsr) : :);
> > +               "=r" (dest->vcsr) : "r"(t1) : "t4");
> >  }
> >
> >  static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
> >  {
> > -     asm volatile (
> > +     register u32 t1 asm("t1") = (SR_FS);
> > +
> > +     /*
> > +      * Similar to __vstate_csr_save above, restore values for the
> > +      * separate VXRM and VXSAT CSRs from the vcsr variable.
> > +      */
> > +     asm volatile (ALTERNATIVE(
> >               ".option push\n\t"
> >               ".option arch, +v\n\t"
> >               "vsetvl  x0, %2, %1\n\t"
> >               ".option pop\n\t"
> >               "csrw   " __stringify(CSR_VSTART) ", %0\n\t"
> >               "csrw   " __stringify(CSR_VCSR) ", %3\n\t"
> > +             __nops(6),
> > +             "csrs   sstatus, t1\n\t"
> > +             ".option push\n\t"
> > +             ".option arch, +v\n\t"
> > +             "vsetvl  x0, %2, %1\n\t"
> > +             ".option pop\n\t"
> > +             "csrw   " __stringify(CSR_VSTART) ", %0\n\t"
> > +             "srliw  t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
> > +             "andi   t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
> > +             "csrw   " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
> > +             "andi   %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
> > +             "csrw   " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
> > +             "csrc   sstatus, t1\n\t",
> > +             THEAD_VENDOR_ID,
> > +             ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> >               : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
> > -                 "r" (src->vcsr) :);
> > +                 "r" (src->vcsr), "r"(t1) : "t4");
> >  }
> >
> >  static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> >
> >       riscv_v_enable();
> >       __vstate_csr_save(save_to);
> > -     asm volatile (
> > +     asm volatile (ALTERNATIVE(
> > +             "nop\n\t"
> >               ".option push\n\t"
> >               ".option arch, +v\n\t"
> >               "vsetvli        %0, x0, e8, m8, ta, ma\n\t"
> > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
> >               "vse8.v         v16, (%1)\n\t"
> >               "add            %1, %1, %0\n\t"
> >               "vse8.v         v24, (%1)\n\t"
> > -             ".option pop\n\t"
> > -             : "=&r" (vl) : "r" (datap) : "memory");
> > +             ".option pop\n\t",
> > +             "mv             t0, %1\n\t"
> > +             THEAD_VSETVLI_T4X0E8M8D1
> > +             THEAD_VSB_V_V0T0
> > +             "addi           t0, t0, 128\n\t"
> > +             THEAD_VSB_V_V8T0
> > +             "addi           t0, t0, 128\n\t"
> > +             THEAD_VSB_V_V16T0
> > +             "addi           t0, t0, 128\n\t"
> > +             THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
> > +             ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > +             : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
> >       riscv_v_disable();
> >  }
> >
> > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> >       unsigned long vl;
> >
> >       riscv_v_enable();
> > -     asm volatile (
> > +     asm volatile (ALTERNATIVE(
> > +             "nop\n\t"
> >               ".option push\n\t"
> >               ".option arch, +v\n\t"
> >               "vsetvli        %0, x0, e8, m8, ta, ma\n\t"
> > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
> >               "vle8.v         v16, (%1)\n\t"
> >               "add            %1, %1, %0\n\t"
> >               "vle8.v         v24, (%1)\n\t"
> > -             ".option pop\n\t"
> > -             : "=&r" (vl) : "r" (datap) : "memory");
> > +             ".option pop\n\t",
> > +             "mv             t0, %1\n\t"
> > +             THEAD_VSETVLI_T4X0E8M8D1
> > +             THEAD_VLB_V_V0T0
> > +             "addi           t0, t0, 128\n\t"
> > +             THEAD_VLB_V_V8T0
> > +             "addi           t0, t0, 128\n\t"
> > +             THEAD_VLB_V_V16T0
> > +             "addi           t0, t0, 128\n\t"
> > +             THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
> > +             ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
> > +             : "=&r" (vl) : "r" (datap) : "t0", "t4");
> >       __vstate_csr_restore(restore_from);
> >       riscv_v_disable();
> >  }
> > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
> > index 74178fb71805..51726890a4d0 100644
> > --- a/arch/riscv/kernel/vector.c
> > +++ b/arch/riscv/kernel/vector.c
> > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs)
> >       u32 insn = (u32)regs->badaddr;
> >
> >       /* Do not handle if V is not supported, or disabled */
> > -     if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
> > +     if (!has_vector())
> >               return false;

riscv_v_first_use_handler() will not be able to detect if a process is
running with PR_RISCV_V_VSTATE_CTRL_OFF here after applying this
change IIIUC. This is the case where we disable the availability of V
for a process but it still executes V instructions anyway.

> >
> >       /* If V has been enabled then it is not the first-use trap */
> > --
> > 2.39.2
> >
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Rémi Denis-Courmont June 29, 2023, 4:06 p.m. UTC | #12
Hi,

Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit :
> diff --git a/arch/riscv/include/asm/errata_list.h
> b/arch/riscv/include/asm/errata_list.h index fb1a810f3d8c..ab21fadbe9c6
> 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE(				
		\
> 
>  	: "=r" (__ovl) :						
\
>  	: "memory")
> 
> +#ifdef CONFIG_ERRATA_THEAD_VECTOR
> +
> +#define THEAD_C9XX_CSR_VXSAT			0x9
> +#define THEAD_C9XX_CSR_VXRM			0xa
> +
> +/*
> + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
> + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
> + * vsetvli	t4, x0, e8, m8, d1
> + */
> +#define THEAD_VSETVLI_T4X0E8M8D1	".long	0x00307ed7\n\t"

That is equivalent to, and (IMHO) much less legible than:
".insn   i OP_V, 7, t4, x0, 3"
Or even if you don't mind second-guessing RVV 1.0 assemblers:
"vsetvli t4, zero, e8, m8, tu, mu"

Either way, you don't need to hard-code X-register operands in assembler 
macros (though you do unfortunately need to hard-code V register operands if 
you use .insn).

> +
> +/*
> + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
> + * encoding as the standard vse8.v and vle8.v,

Not only in theory. vse8.v and vle8.v have only one possible encoding each 
(for given operands).

> compilers seem to optimize

Nit: By "compilers", do you mean "assemblers"? That's a bit misleading to me.

> + * the call resulting in a different encoding and then using a value for
> + * the "mop" field that is not part of vector-0.7.1

Uh, no? They use mew = 0b0 and mop = 0b00, which corresponds to mop = 0b000.

> + * So encode specific variants for vstate_save and _restore.
> + */
> +#define THEAD_VSB_V_V0T0		".long	0x02028027\n\t"

That's "vse8.v v0, (t0)", at least as assembled with binutils 2.40.50.20230625 
(from Debian unstable). I don't understand the rationale for hard-coding from 
the above comment. Maybe that's just me being an idiot, but if so, then the 
comment ought to be clarified.

(I do realise that vse8.v and vsb.v are not exactly equivalent in behaviour, 
but here, the concern should be the assembler, not the processor.)

> +#define THEAD_VSB_V_V8T0		".long	0x02028427\n\t"
> +#define THEAD_VSB_V_V16T0		".long	0x02028827\n\t"
> +#define THEAD_VSB_V_V24T0		".long	0x02028c27\n\t"
> +#define THEAD_VLB_V_V0T0		".long	0x012028007\n\t"

This has one nibble too many for a 32-bit value.

And why use sign-extended loads? Zero-extended loads would have the exact same 
encoding as vle8.v, and not need this dark magic, AFAICT.

> +#define THEAD_VLB_V_V8T0		".long	0x012028407\n\t"
> +#define THEAD_VLB_V_V16T0		".long	0x012028807\n\t"
> +#define THEAD_VLB_V_V24T0		".long	0x012028c07\n\t"
> +
> +#define ALT_SR_VS_VECTOR_1_0_SHIFT	9
> +#define ALT_SR_VS_THEAD_SHIFT		23
> +
> +#define ALT_SR_VS(_val, prot)					
	\
> +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3",				
\
> +		"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,		
\
> +		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)	\
> +		: "=r"(_val)					
	\
> +		: "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT),	\
> +		  "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT),		
\
> +		  "I"(ALT_SR_VS_VECTOR_1_0_SHIFT),			
\
> +		  "I"(ALT_SR_VS_THEAD_SHIFT))
> +#endif /* CONFIG_ERRATA_THEAD_VECTOR */
> +
>  #endif /* __ASSEMBLY__ */
> 
>  #endif
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata
index 0c8f4652cd82..b461312dd452 100644
--- a/arch/riscv/Kconfig.errata
+++ b/arch/riscv/Kconfig.errata
@@ -77,4 +77,17 @@  config ERRATA_THEAD_PMU
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_VECTOR
+	bool "Apply T-Head Vector errata"
+	depends on ERRATA_THEAD && RISCV_ISA_V
+	default y
+	help
+	  The T-Head C9xx cores implement an earlier version 0.7.1
+	  of the vector extensions.
+
+	  This will apply the necessary errata to handle the non-standard
+	  behaviour via when switch to and from vector mode for processes.
+
+	  If you don't know what to do here, say "Y".
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index c259dc925ec1..c41ec84bc8a5 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -15,6 +15,7 @@ 
 #include <asm/errata_list.h>
 #include <asm/hwprobe.h>
 #include <asm/patch.h>
+#include <asm/vector.h>
 #include <asm/vendorid_list.h>
 
 static bool errata_probe_pbmt(unsigned int stage,
@@ -66,6 +67,34 @@  static bool errata_probe_pmu(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_vector(unsigned int stage,
+				unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR))
+		return false;
+
+	/* target-c9xx cores report arch_id and impid as 0 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) {
+		/*
+		 * Disable VECTOR to detect illegal usage of vector in kernel.
+		 * This is normally done in _start_kernel but with the
+		 * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's
+		 * vector-0.7.1 and the vector-1.0-bits are unused there.
+		 */
+		csr_clear(CSR_STATUS, SR_VS_THEAD);
+		return false;
+	}
+
+	/* let has_vector() return true and set the static vlen */
+	riscv_vector_supported();
+	riscv_v_vsize = 128 / 8 * 32;
+
+	return true;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -80,6 +109,9 @@  static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_pmu(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
 
+	if (errata_probe_vector(stage, archid, impid))
+		cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 2d79bca6ffe8..521b3b939e51 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -24,11 +24,25 @@ 
 #define SR_FS_CLEAN	_AC(0x00004000, UL)
 #define SR_FS_DIRTY	_AC(0x00006000, UL)
 
-#define SR_VS		_AC(0x00000600, UL) /* Vector Status */
-#define SR_VS_OFF	_AC(0x00000000, UL)
-#define SR_VS_INITIAL	_AC(0x00000200, UL)
-#define SR_VS_CLEAN	_AC(0x00000400, UL)
-#define SR_VS_DIRTY	_AC(0x00000600, UL)
+#define SR_VS_OFF		_AC(0x00000000, UL)
+
+#define SR_VS_1_0		_AC(0x00000600, UL) /* Vector Status */
+#define SR_VS_INITIAL_1_0	_AC(0x00000200, UL)
+#define SR_VS_CLEAN_1_0		_AC(0x00000400, UL)
+#define SR_VS_DIRTY_1_0		_AC(0x00000600, UL)
+
+#define SR_VS_THEAD		_AC(0x01800000, UL) /* Vector Status */
+#define SR_VS_INITIAL_THEAD	_AC(0x00800000, UL)
+#define SR_VS_CLEAN_THEAD	_AC(0x01000000, UL)
+#define SR_VS_DIRTY_THEAD	_AC(0x01800000, UL)
+
+/*
+ * Always default to vector-1.0 handling in assembly and let the broken
+ * implementations handle their case separately.
+ */
+#ifdef __ASSEMBLY__
+#define SR_VS			SR_VS_1_0
+#endif
 
 #define SR_XS		_AC(0x00018000, UL) /* Extension Status */
 #define SR_XS_OFF	_AC(0x00000000, UL)
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index fb1a810f3d8c..ab21fadbe9c6 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -21,7 +21,8 @@ 
 #define	ERRATA_THEAD_PBMT 0
 #define	ERRATA_THEAD_CMO 1
 #define	ERRATA_THEAD_PMU 2
-#define	ERRATA_THEAD_NUMBER 3
+#define	ERRATA_THEAD_VECTOR 3
+#define	ERRATA_THEAD_NUMBER 4
 #endif
 
 #ifdef __ASSEMBLY__
@@ -154,6 +155,48 @@  asm volatile(ALTERNATIVE(						\
 	: "=r" (__ovl) :						\
 	: "memory")
 
+#ifdef CONFIG_ERRATA_THEAD_VECTOR
+
+#define THEAD_C9XX_CSR_VXSAT			0x9
+#define THEAD_C9XX_CSR_VXRM			0xa
+
+/*
+ * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older
+ * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for
+ * vsetvli	t4, x0, e8, m8, d1
+ */
+#define THEAD_VSETVLI_T4X0E8M8D1	".long	0x00307ed7\n\t"
+
+/*
+ * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same
+ * encoding as the standard vse8.v and vle8.v, compilers seem to optimize
+ * the call resulting in a different encoding and then using a value for
+ * the "mop" field that is not part of vector-0.7.1
+ * So encode specific variants for vstate_save and _restore.
+ */
+#define THEAD_VSB_V_V0T0		".long	0x02028027\n\t"
+#define THEAD_VSB_V_V8T0		".long	0x02028427\n\t"
+#define THEAD_VSB_V_V16T0		".long	0x02028827\n\t"
+#define THEAD_VSB_V_V24T0		".long	0x02028c27\n\t"
+#define THEAD_VLB_V_V0T0		".long	0x012028007\n\t"
+#define THEAD_VLB_V_V8T0		".long	0x012028407\n\t"
+#define THEAD_VLB_V_V16T0		".long	0x012028807\n\t"
+#define THEAD_VLB_V_V24T0		".long	0x012028c07\n\t"
+
+#define ALT_SR_VS_VECTOR_1_0_SHIFT	9
+#define ALT_SR_VS_THEAD_SHIFT		23
+
+#define ALT_SR_VS(_val, prot)						\
+asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3",				\
+		"li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID,		\
+		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)	\
+		: "=r"(_val)						\
+		: "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT),	\
+		  "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT),		\
+		  "I"(ALT_SR_VS_VECTOR_1_0_SHIFT),			\
+		  "I"(ALT_SR_VS_THEAD_SHIFT))
+#endif /* CONFIG_ERRATA_THEAD_VECTOR */
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 315c96d2b4d0..fa47f60f81e3 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -18,6 +18,55 @@ 
 #include <asm/hwcap.h>
 #include <asm/csr.h>
 #include <asm/asm.h>
+#include <asm/errata_list.h>
+
+#ifdef CONFIG_ERRATA_THEAD_VECTOR
+
+static inline unsigned long riscv_sr_vs(void)
+{
+	u32 val;
+
+	ALT_SR_VS(val, SR_VS);
+	return val;
+}
+
+static inline unsigned long riscv_sr_vs_initial(void)
+{
+	u32 val;
+
+	ALT_SR_VS(val, SR_VS_INITIAL);
+	return val;
+}
+
+static inline unsigned long riscv_sr_vs_clean(void)
+{
+	u32 val;
+
+	ALT_SR_VS(val, SR_VS_CLEAN);
+	return val;
+}
+
+static inline unsigned long riscv_sr_vs_dirty(void)
+{
+	u32 val;
+
+	ALT_SR_VS(val, SR_VS_DIRTY);
+	return val;
+}
+
+#define SR_VS		riscv_sr_vs()
+#define SR_VS_INITIAL	riscv_sr_vs_initial()
+#define SR_VS_CLEAN	riscv_sr_vs_clean()
+#define SR_VS_DIRTY	riscv_sr_vs_dirty()
+
+#else /* CONFIG_ERRATA_THEAD_VECTOR */
+
+#define SR_VS		SR_VS_1_0
+#define SR_VS_INITIAL	SR_VS_INITIAL_1_0
+#define SR_VS_CLEAN	SR_VS_CLEAN_1_0
+#define SR_VS_DIRTY	SR_VS_DIRTY_1_0
+
+#endif /* CONFIG_ERRATA_THEAD_VECTOR */
 
 extern bool riscv_v_supported;
 void riscv_vector_supported(void);
@@ -63,26 +112,74 @@  static __always_inline void riscv_v_disable(void)
 
 static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
 {
-	asm volatile (
+	register u32 t1 asm("t1") = (SR_FS);
+
+	/*
+	 * CSR_VCSR is defined as
+	 * [2:1] - vxrm[1:0]
+	 * [0] - vxsat
+	 * The earlier vector spec implemented by T-Head uses separate
+	 * registers for the same bit-elements, so just combine those
+	 * into the existing output field.
+	 *
+	 * Additionally T-Head cores need FS to be enabled when accessing
+	 * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
+	 * Though the cores do not implement the VXRM and VXSAT fields in the
+	 * FCSR CSR that vector-0.7.1 specifies.
+	 */
+	asm volatile (ALTERNATIVE(
 		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
 		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
 		"csrr	%2, " __stringify(CSR_VL) "\n\t"
 		"csrr	%3, " __stringify(CSR_VCSR) "\n\t"
+		__nops(5),
+		"csrs	sstatus, t1\n\t"
+		"csrr	%0, " __stringify(CSR_VSTART) "\n\t"
+		"csrr	%1, " __stringify(CSR_VTYPE) "\n\t"
+		"csrr	%2, " __stringify(CSR_VL) "\n\t"
+		"csrr	%3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t"
+		"slliw	%3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
+		"csrr	t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t"
+		"or	%3, %3, t4\n\t"
+		"csrc	sstatus, t1\n\t",
+		THEAD_VENDOR_ID,
+		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
 		: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
-		  "=r" (dest->vcsr) : :);
+		  "=r" (dest->vcsr) : "r"(t1) : "t4");
 }
 
 static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
 {
-	asm volatile (
+	register u32 t1 asm("t1") = (SR_FS);
+
+	/*
+	 * Similar to __vstate_csr_save above, restore values for the
+	 * separate VXRM and VXSAT CSRs from the vcsr variable.
+	 */
+	asm volatile (ALTERNATIVE(
 		".option push\n\t"
 		".option arch, +v\n\t"
 		"vsetvl	 x0, %2, %1\n\t"
 		".option pop\n\t"
 		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
 		"csrw	" __stringify(CSR_VCSR) ", %3\n\t"
+		__nops(6),
+		"csrs	sstatus, t1\n\t"
+		".option push\n\t"
+		".option arch, +v\n\t"
+		"vsetvl	 x0, %2, %1\n\t"
+		".option pop\n\t"
+		"csrw	" __stringify(CSR_VSTART) ", %0\n\t"
+		"srliw	t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t"
+		"andi	t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t"
+		"csrw	" __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t"
+		"andi	%3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t"
+		"csrw	" __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t"
+		"csrc	sstatus, t1\n\t",
+		THEAD_VENDOR_ID,
+		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
 		: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
-		    "r" (src->vcsr) :);
+		    "r" (src->vcsr), "r"(t1) : "t4");
 }
 
 static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
@@ -92,7 +189,8 @@  static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
 
 	riscv_v_enable();
 	__vstate_csr_save(save_to);
-	asm volatile (
+	asm volatile (ALTERNATIVE(
+		"nop\n\t"
 		".option push\n\t"
 		".option arch, +v\n\t"
 		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
@@ -103,8 +201,18 @@  static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
 		"vse8.v		v16, (%1)\n\t"
 		"add		%1, %1, %0\n\t"
 		"vse8.v		v24, (%1)\n\t"
-		".option pop\n\t"
-		: "=&r" (vl) : "r" (datap) : "memory");
+		".option pop\n\t",
+		"mv		t0, %1\n\t"
+		THEAD_VSETVLI_T4X0E8M8D1
+		THEAD_VSB_V_V0T0
+		"addi		t0, t0, 128\n\t"
+		THEAD_VSB_V_V8T0
+		"addi		t0, t0, 128\n\t"
+		THEAD_VSB_V_V16T0
+		"addi		t0, t0, 128\n\t"
+		THEAD_VSB_V_V24T0, THEAD_VENDOR_ID,
+		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
+		: "=&r" (vl) : "r" (datap) : "t0", "t4", "memory");
 	riscv_v_disable();
 }
 
@@ -114,7 +222,8 @@  static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
 	unsigned long vl;
 
 	riscv_v_enable();
-	asm volatile (
+	asm volatile (ALTERNATIVE(
+		"nop\n\t"
 		".option push\n\t"
 		".option arch, +v\n\t"
 		"vsetvli	%0, x0, e8, m8, ta, ma\n\t"
@@ -125,8 +234,18 @@  static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
 		"vle8.v		v16, (%1)\n\t"
 		"add		%1, %1, %0\n\t"
 		"vle8.v		v24, (%1)\n\t"
-		".option pop\n\t"
-		: "=&r" (vl) : "r" (datap) : "memory");
+		".option pop\n\t",
+		"mv		t0, %1\n\t"
+		THEAD_VSETVLI_T4X0E8M8D1
+		THEAD_VLB_V_V0T0
+		"addi		t0, t0, 128\n\t"
+		THEAD_VLB_V_V8T0
+		"addi		t0, t0, 128\n\t"
+		THEAD_VLB_V_V16T0
+		"addi		t0, t0, 128\n\t"
+		THEAD_VLB_V_V24T0, THEAD_VENDOR_ID,
+		ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR)
+		: "=&r" (vl) : "r" (datap) : "t0", "t4");
 	__vstate_csr_restore(restore_from);
 	riscv_v_disable();
 }
diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c
index 74178fb71805..51726890a4d0 100644
--- a/arch/riscv/kernel/vector.c
+++ b/arch/riscv/kernel/vector.c
@@ -140,7 +140,7 @@  bool riscv_v_first_use_handler(struct pt_regs *regs)
 	u32 insn = (u32)regs->badaddr;
 
 	/* Do not handle if V is not supported, or disabled */
-	if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V))
+	if (!has_vector())
 		return false;
 
 	/* If V has been enabled then it is not the first-use trap */