Message ID | 20230622231305.631331-4-heiko@sntech.de (mailing list archive) |
---|---|
State | Changes Requested |
Headers | show |
Series | RISC-V: T-Head vector handling | expand |
Context | Check | Description |
---|---|---|
conchuod/cover_letter | success | Series has a cover letter |
conchuod/tree_selection | success | Guessed tree name to be for-next at HEAD 4681dacadeef |
conchuod/fixes_present | success | Fixes tag not required for -next series |
conchuod/maintainers_pattern | success | MAINTAINERS pattern errors before the patch: 6 and now 6 |
conchuod/verify_signedoff | success | Signed-off-by tag matches author and committer |
conchuod/kdoc | success | Errors and warnings before: 0 this patch: 0 |
conchuod/build_rv64_clang_allmodconfig | fail | Failed to build the tree with this patch. |
conchuod/module_param | success | Was 0 now: 0 |
conchuod/build_rv64_gcc_allmodconfig | success | Errors and warnings before: 16720 this patch: 16720 |
conchuod/build_rv32_defconfig | fail | Build failed |
conchuod/dtb_warn_rv64 | success | Errors and warnings before: 20 this patch: 20 |
conchuod/header_inline | success | No static functions without inline keyword in header files |
conchuod/checkpatch | warning | CHECK: Lines should not end with a '(' |
conchuod/build_rv64_nommu_k210_defconfig | fail | Build failed |
conchuod/verify_fixes | success | No Fixes tag |
conchuod/build_rv64_nommu_virt_defconfig | fail | Build failed |
Hi Heiko, kernel test robot noticed the following build errors: [auto build test ERROR on next-20230622] [cannot apply to linus/master v6.4-rc7 v6.4-rc6 v6.4-rc5 v6.4-rc7] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Heiko-Stuebner/RISC-V-move-vector-available-status-into-a-dedicated-variable/20230623-081314 base: next-20230622 patch link: https://lore.kernel.org/r/20230622231305.631331-4-heiko%40sntech.de patch subject: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling config: riscv-randconfig-r042-20230622 (https://download.01.org/0day-ci/archive/20230623/202306231142.j8XLzSQL-lkp@intel.com/config) compiler: riscv64-linux-gcc (GCC) 12.3.0 reproduce: (https://download.01.org/0day-ci/archive/20230623/202306231142.j8XLzSQL-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202306231142.j8XLzSQL-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from arch/riscv/include/asm/alternative.h:9, from arch/riscv/include/asm/errata_list.h:8, from arch/riscv/include/asm/tlbflush.h:12, from arch/riscv/include/asm/pgtable.h:112, from arch/riscv/include/asm/uaccess.h:12, from include/linux/uaccess.h:11, from include/linux/sched/task.h:11, from include/linux/sched/signal.h:9, from include/linux/rcuwait.h:6, from include/linux/percpu-rwsem.h:7, from include/linux/fs.h:33, from include/linux/compat.h:17, from arch/riscv/include/asm/elf.h:12, from include/linux/elf.h:6, from include/linux/module.h:19, from include/linux/device/driver.h:21, from include/linux/device.h:32, from include/linux/energy_model.h:5, from kernel/sched/fair.c:23: arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_save': >> arch/riscv/include/asm/vector.h:206:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1' 206 | THEAD_VSETVLI_T4X0E8M8D1 | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT' 78 | new_c "\n" \ | ^~~~~ arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG' 104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k)) | ^~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG' 152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k) | ^~~~~~~~~~~~~~~~ arch/riscv/include/asm/vector.h:192:23: note: in expansion of macro 'ALTERNATIVE' 192 | asm volatile (ALTERNATIVE( | ^~~~~~~~~~~ In file included from arch/riscv/include/asm/switch_to.h:11, from kernel/sched/fair.c:51: arch/riscv/include/asm/vector.h:188:23: warning: unused variable 'vl' [-Wunused-variable] 188 | unsigned long vl; | ^~ arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_restore': arch/riscv/include/asm/vector.h:239:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1' 239 | THEAD_VSETVLI_T4X0E8M8D1 | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT' 78 | new_c "\n" \ | ^~~~~ arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG' 104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k)) | ^~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG' 152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k) | ^~~~~~~~~~~~~~~~ arch/riscv/include/asm/vector.h:225:23: note: in expansion of macro 'ALTERNATIVE' 225 | asm volatile (ALTERNATIVE( | ^~~~~~~~~~~ arch/riscv/include/asm/vector.h:222:23: warning: unused variable 'vl' [-Wunused-variable] 222 | unsigned long vl; | ^~ kernel/sched/fair.c: At top level: kernel/sched/fair.c:688:5: warning: no previous prototype for 'sched_update_scaling' [-Wmissing-prototypes] 688 | int sched_update_scaling(void) | ^~~~~~~~~~~~~~~~~~~~ -- In file included from arch/riscv/include/asm/alternative.h:9, from arch/riscv/include/asm/errata_list.h:8, from arch/riscv/include/asm/tlbflush.h:12, from arch/riscv/include/asm/pgtable.h:112, from arch/riscv/include/asm/uaccess.h:12, from include/linux/uaccess.h:11, from include/linux/sched/task.h:11, from include/linux/sched/signal.h:9, from include/linux/sched/cputime.h:5, from kernel/sched/build_utility.c:13: arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_save': >> arch/riscv/include/asm/vector.h:206:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1' 206 | THEAD_VSETVLI_T4X0E8M8D1 | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT' 78 | new_c "\n" \ | ^~~~~ arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG' 104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k)) | ^~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG' 152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k) | ^~~~~~~~~~~~~~~~ arch/riscv/include/asm/vector.h:192:23: note: in expansion of macro 'ALTERNATIVE' 192 | asm volatile (ALTERNATIVE( | ^~~~~~~~~~~ In file included from arch/riscv/include/asm/switch_to.h:11, from kernel/sched/build_utility.c:51: arch/riscv/include/asm/vector.h:188:23: warning: unused variable 'vl' [-Wunused-variable] 188 | unsigned long vl; | ^~ arch/riscv/include/asm/vector.h: In function '__riscv_v_vstate_restore': arch/riscv/include/asm/vector.h:239:17: error: expected ':' or ')' before 'THEAD_VSETVLI_T4X0E8M8D1' 239 | THEAD_VSETVLI_T4X0E8M8D1 | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:78:9: note: in definition of macro 'ALT_NEW_CONTENT' 78 | new_c "\n" \ | ^~~~~ arch/riscv/include/asm/alternative-macros.h:104:9: note: in expansion of macro '__ALTERNATIVE_CFG' 104 | __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k)) | ^~~~~~~~~~~~~~~~~ arch/riscv/include/asm/alternative-macros.h:152:9: note: in expansion of macro '_ALTERNATIVE_CFG' 152 | _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k) | ^~~~~~~~~~~~~~~~ arch/riscv/include/asm/vector.h:225:23: note: in expansion of macro 'ALTERNATIVE' 225 | asm volatile (ALTERNATIVE( | ^~~~~~~~~~~ arch/riscv/include/asm/vector.h:222:23: warning: unused variable 'vl' [-Wunused-variable] 222 | unsigned long vl; | ^~ vim +206 arch/riscv/include/asm/vector.h 184 185 static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, 186 void *datap) 187 { 188 unsigned long vl; 189 190 riscv_v_enable(); 191 __vstate_csr_save(save_to); 192 asm volatile (ALTERNATIVE( 193 "nop\n\t" 194 ".option push\n\t" 195 ".option arch, +v\n\t" 196 "vsetvli %0, x0, e8, m8, ta, ma\n\t" 197 "vse8.v v0, (%1)\n\t" 198 "add %1, %1, %0\n\t" 199 "vse8.v v8, (%1)\n\t" 200 "add %1, %1, %0\n\t" 201 "vse8.v v16, (%1)\n\t" 202 "add %1, %1, %0\n\t" 203 "vse8.v v24, (%1)\n\t" 204 ".option pop\n\t", 205 "mv t0, %1\n\t" > 206 THEAD_VSETVLI_T4X0E8M8D1 207 THEAD_VSB_V_V0T0 208 "addi t0, t0, 128\n\t" 209 THEAD_VSB_V_V8T0 210 "addi t0, t0, 128\n\t" 211 THEAD_VSB_V_V16T0 212 "addi t0, t0, 128\n\t" 213 THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, 214 ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) 215 : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); 216 riscv_v_disable(); 217 } 218
Hey Heiko, On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote: > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > T-Head C9xx cores implement an older version (0.7.1) of the vector > specification. > > Relevant changes concerning the kernel are: > - different placement of the SR_VS bit for the vector unit status > - different encoding of the vsetvli instruction > - different instructions for loads and stores > > And a fixed VLEN of 128. > > The in-kernel access to vector instances is limited to the save and > restore of process states so the above mentioned areas can simply be > handled via the alternatives framework, similar to other T-Head specific > issues. > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > --- > arch/riscv/Kconfig.errata | 13 +++ > arch/riscv/errata/thead/errata.c | 32 ++++++ > arch/riscv/include/asm/csr.h | 24 ++++- > arch/riscv/include/asm/errata_list.h | 45 ++++++++- > arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++-- > arch/riscv/kernel/vector.c | 2 +- > 6 files changed, 238 insertions(+), 17 deletions(-) > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata > index 0c8f4652cd82..b461312dd452 100644 > --- a/arch/riscv/Kconfig.errata > +++ b/arch/riscv/Kconfig.errata > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU > > If you don't know what to do here, say "Y". > > +config ERRATA_THEAD_VECTOR > + bool "Apply T-Head Vector errata" > + depends on ERRATA_THEAD && RISCV_ISA_V > + default y > + help > + The T-Head C9xx cores implement an earlier version 0.7.1 > + of the vector extensions. > + > + This will apply the necessary errata to handle the non-standard > + behaviour via when switch to and from vector mode for processes. > + > + If you don't know what to do here, say "Y". > + > endmenu # "CPU errata selection" > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c > index c259dc925ec1..c41ec84bc8a5 100644 > --- a/arch/riscv/errata/thead/errata.c > +++ b/arch/riscv/errata/thead/errata.c > @@ -15,6 +15,7 @@ > #include <asm/errata_list.h> > #include <asm/hwprobe.h> > #include <asm/patch.h> > +#include <asm/vector.h> > #include <asm/vendorid_list.h> > > static bool errata_probe_pbmt(unsigned int stage, > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage, > return true; > } > > +static bool errata_probe_vector(unsigned int stage, > + unsigned long arch_id, unsigned long impid) > +{ > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR)) > + return false; > + > + /* target-c9xx cores report arch_id and impid as 0 */ > + if (arch_id != 0 || impid != 0) > + return false; > + > + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) { > + /* > + * Disable VECTOR to detect illegal usage of vector in kernel. > + * This is normally done in _start_kernel but with the > + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's > + * vector-0.7.1 and the vector-1.0-bits are unused there. > + */ > + csr_clear(CSR_STATUS, SR_VS_THEAD); > + return false; > + } > + > + /* let has_vector() return true and set the static vlen */ Hmm, I was wondering about how you were going to communicate this to userspace, since you're not going to be setting "v" in your DT, so there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that this is your intention, as you'd not need to drop the alternative-based stuff from has_vector() if it wasn't) I don't think you can do this, as things stand, because of how hwprobe operates: static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, const struct cpumask *cpus) { ... if (has_vector()) pair->value |= RISCV_HWPROBE_IMA_V; ... } * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by version 1.0 of the RISC-V Vector extension manual. You'll need to change hwprobe to use has_vector() && riscv_has_extension_unlikely(v), or similar, as the condition for reporting. You'll also need some other way to communicate to userspace that T-Head's vector stuff is supported, no? I'm also _really_ unconvinced that turning on extensions that were not indicated in the DT or via ACPI is something we should be doing. Have I missed something here that'd make that assessment inaccurate? Cheers, Conor. FWIW I am currently working on kernel-side support for the new extension properties that I have posted dt-binding patches for. I'll go post it once Palmer has merged his current set of patches in his staging repo into for-next, as I've got a lot of deps on riscv,isa parser changes. I'm really hoping that it provides an easier way to go off probing for vendor specific stuff for DT-based systems, since it will no longer require complex probing, just an of_property_match_string() for each possible cpu and we could very well provide a vendor hook during that process. Clearly though, that stuff is not yet merged as it has not even been posted yet. Current WIP of that is here: https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer > + riscv_vector_supported(); > + riscv_v_vsize = 128 / 8 * 32; > + > + return true; > +} > + > static u32 thead_errata_probe(unsigned int stage, > unsigned long archid, unsigned long impid) > { > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage, > if (errata_probe_pmu(stage, archid, impid)) > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); > > + if (errata_probe_vector(stage, archid, impid)) > + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR); > + > return cpu_req_errata; > } > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > index 2d79bca6ffe8..521b3b939e51 100644 > --- a/arch/riscv/include/asm/csr.h > +++ b/arch/riscv/include/asm/csr.h > @@ -24,11 +24,25 @@ > #define SR_FS_CLEAN _AC(0x00004000, UL) > #define SR_FS_DIRTY _AC(0x00006000, UL) > > -#define SR_VS _AC(0x00000600, UL) /* Vector Status */ > -#define SR_VS_OFF _AC(0x00000000, UL) > -#define SR_VS_INITIAL _AC(0x00000200, UL) > -#define SR_VS_CLEAN _AC(0x00000400, UL) > -#define SR_VS_DIRTY _AC(0x00000600, UL) > +#define SR_VS_OFF _AC(0x00000000, UL) > + > +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */ > +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL) > +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL) > +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL) > + > +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */ > +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) > +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) > +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) > + > +/* > + * Always default to vector-1.0 handling in assembly and let the broken > + * implementations handle their case separately. > + */ > +#ifdef __ASSEMBLY__ > +#define SR_VS SR_VS_1_0 > +#endif > > #define SR_XS _AC(0x00018000, UL) /* Extension Status */ > #define SR_XS_OFF _AC(0x00000000, UL) > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > index fb1a810f3d8c..ab21fadbe9c6 100644 > --- a/arch/riscv/include/asm/errata_list.h > +++ b/arch/riscv/include/asm/errata_list.h > @@ -21,7 +21,8 @@ > #define ERRATA_THEAD_PBMT 0 > #define ERRATA_THEAD_CMO 1 > #define ERRATA_THEAD_PMU 2 > -#define ERRATA_THEAD_NUMBER 3 > +#define ERRATA_THEAD_VECTOR 3 > +#define ERRATA_THEAD_NUMBER 4 > #endif > > #ifdef __ASSEMBLY__ > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \ > : "=r" (__ovl) : \ > : "memory") > > +#ifdef CONFIG_ERRATA_THEAD_VECTOR > + > +#define THEAD_C9XX_CSR_VXSAT 0x9 > +#define THEAD_C9XX_CSR_VXRM 0xa > + > +/* > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for > + * vsetvli t4, x0, e8, m8, d1 > + */ > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" > + > +/* > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize > + * the call resulting in a different encoding and then using a value for > + * the "mop" field that is not part of vector-0.7.1 > + * So encode specific variants for vstate_save and _restore. > + */ > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" > + > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 > +#define ALT_SR_VS_THEAD_SHIFT 23 > + > +#define ALT_SR_VS(_val, prot) \ > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ > + : "=r"(_val) \ > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ > + "I"(ALT_SR_VS_THEAD_SHIFT)) > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ > + > #endif /* __ASSEMBLY__ */ > > #endif > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h > index 315c96d2b4d0..fa47f60f81e3 100644 > --- a/arch/riscv/include/asm/vector.h > +++ b/arch/riscv/include/asm/vector.h > @@ -18,6 +18,55 @@ > #include <asm/hwcap.h> > #include <asm/csr.h> > #include <asm/asm.h> > +#include <asm/errata_list.h> > + > +#ifdef CONFIG_ERRATA_THEAD_VECTOR > + > +static inline unsigned long riscv_sr_vs(void) > +{ > + u32 val; > + > + ALT_SR_VS(val, SR_VS); > + return val; > +} > + > +static inline unsigned long riscv_sr_vs_initial(void) > +{ > + u32 val; > + > + ALT_SR_VS(val, SR_VS_INITIAL); > + return val; > +} > + > +static inline unsigned long riscv_sr_vs_clean(void) > +{ > + u32 val; > + > + ALT_SR_VS(val, SR_VS_CLEAN); > + return val; > +} > + > +static inline unsigned long riscv_sr_vs_dirty(void) > +{ > + u32 val; > + > + ALT_SR_VS(val, SR_VS_DIRTY); > + return val; > +} > + > +#define SR_VS riscv_sr_vs() > +#define SR_VS_INITIAL riscv_sr_vs_initial() > +#define SR_VS_CLEAN riscv_sr_vs_clean() > +#define SR_VS_DIRTY riscv_sr_vs_dirty() > + > +#else /* CONFIG_ERRATA_THEAD_VECTOR */ > + > +#define SR_VS SR_VS_1_0 > +#define SR_VS_INITIAL SR_VS_INITIAL_1_0 > +#define SR_VS_CLEAN SR_VS_CLEAN_1_0 > +#define SR_VS_DIRTY SR_VS_DIRTY_1_0 > + > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ > > extern bool riscv_v_supported; > void riscv_vector_supported(void); > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void) > > static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) > { > - asm volatile ( > + register u32 t1 asm("t1") = (SR_FS); > + > + /* > + * CSR_VCSR is defined as > + * [2:1] - vxrm[1:0] > + * [0] - vxsat > + * The earlier vector spec implemented by T-Head uses separate > + * registers for the same bit-elements, so just combine those > + * into the existing output field. > + * > + * Additionally T-Head cores need FS to be enabled when accessing > + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. > + * Though the cores do not implement the VXRM and VXSAT fields in the > + * FCSR CSR that vector-0.7.1 specifies. > + */ > + asm volatile (ALTERNATIVE( > "csrr %0, " __stringify(CSR_VSTART) "\n\t" > "csrr %1, " __stringify(CSR_VTYPE) "\n\t" > "csrr %2, " __stringify(CSR_VL) "\n\t" > "csrr %3, " __stringify(CSR_VCSR) "\n\t" > + __nops(5), > + "csrs sstatus, t1\n\t" > + "csrr %0, " __stringify(CSR_VSTART) "\n\t" > + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" > + "csrr %2, " __stringify(CSR_VL) "\n\t" > + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t" > + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" > + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t" > + "or %3, %3, t4\n\t" > + "csrc sstatus, t1\n\t", > + THEAD_VENDOR_ID, > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), > - "=r" (dest->vcsr) : :); > + "=r" (dest->vcsr) : "r"(t1) : "t4"); > } > > static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) > { > - asm volatile ( > + register u32 t1 asm("t1") = (SR_FS); > + > + /* > + * Similar to __vstate_csr_save above, restore values for the > + * separate VXRM and VXSAT CSRs from the vcsr variable. > + */ > + asm volatile (ALTERNATIVE( > ".option push\n\t" > ".option arch, +v\n\t" > "vsetvl x0, %2, %1\n\t" > ".option pop\n\t" > "csrw " __stringify(CSR_VSTART) ", %0\n\t" > "csrw " __stringify(CSR_VCSR) ", %3\n\t" > + __nops(6), > + "csrs sstatus, t1\n\t" > + ".option push\n\t" > + ".option arch, +v\n\t" > + "vsetvl x0, %2, %1\n\t" > + ".option pop\n\t" > + "csrw " __stringify(CSR_VSTART) ", %0\n\t" > + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" > + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" > + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" > + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" > + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" > + "csrc sstatus, t1\n\t", > + THEAD_VENDOR_ID, > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), > - "r" (src->vcsr) :); > + "r" (src->vcsr), "r"(t1) : "t4"); > } > > static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > > riscv_v_enable(); > __vstate_csr_save(save_to); > - asm volatile ( > + asm volatile (ALTERNATIVE( > + "nop\n\t" > ".option push\n\t" > ".option arch, +v\n\t" > "vsetvli %0, x0, e8, m8, ta, ma\n\t" > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > "vse8.v v16, (%1)\n\t" > "add %1, %1, %0\n\t" > "vse8.v v24, (%1)\n\t" > - ".option pop\n\t" > - : "=&r" (vl) : "r" (datap) : "memory"); > + ".option pop\n\t", > + "mv t0, %1\n\t" > + THEAD_VSETVLI_T4X0E8M8D1 > + THEAD_VSB_V_V0T0 > + "addi t0, t0, 128\n\t" > + THEAD_VSB_V_V8T0 > + "addi t0, t0, 128\n\t" > + THEAD_VSB_V_V16T0 > + "addi t0, t0, 128\n\t" > + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); > riscv_v_disable(); > } > > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ > unsigned long vl; > > riscv_v_enable(); > - asm volatile ( > + asm volatile (ALTERNATIVE( > + "nop\n\t" > ".option push\n\t" > ".option arch, +v\n\t" > "vsetvli %0, x0, e8, m8, ta, ma\n\t" > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ > "vle8.v v16, (%1)\n\t" > "add %1, %1, %0\n\t" > "vle8.v v24, (%1)\n\t" > - ".option pop\n\t" > - : "=&r" (vl) : "r" (datap) : "memory"); > + ".option pop\n\t", > + "mv t0, %1\n\t" > + THEAD_VSETVLI_T4X0E8M8D1 > + THEAD_VLB_V_V0T0 > + "addi t0, t0, 128\n\t" > + THEAD_VLB_V_V8T0 > + "addi t0, t0, 128\n\t" > + THEAD_VLB_V_V16T0 > + "addi t0, t0, 128\n\t" > + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID, > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > + : "=&r" (vl) : "r" (datap) : "t0", "t4"); > __vstate_csr_restore(restore_from); > riscv_v_disable(); > } > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c > index 74178fb71805..51726890a4d0 100644 > --- a/arch/riscv/kernel/vector.c > +++ b/arch/riscv/kernel/vector.c > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) > u32 insn = (u32)regs->badaddr; > > /* Do not handle if V is not supported, or disabled */ > - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) > + if (!has_vector()) > return false; > > /* If V has been enabled then it is not the first-use trap */ > -- > 2.39.2 >
Hey Conor, Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley: > On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > T-Head C9xx cores implement an older version (0.7.1) of the vector > > specification. > > > > Relevant changes concerning the kernel are: > > - different placement of the SR_VS bit for the vector unit status > > - different encoding of the vsetvli instruction > > - different instructions for loads and stores > > > > And a fixed VLEN of 128. > > > > The in-kernel access to vector instances is limited to the save and > > restore of process states so the above mentioned areas can simply be > > handled via the alternatives framework, similar to other T-Head specific > > issues. > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > --- > > arch/riscv/Kconfig.errata | 13 +++ > > arch/riscv/errata/thead/errata.c | 32 ++++++ > > arch/riscv/include/asm/csr.h | 24 ++++- > > arch/riscv/include/asm/errata_list.h | 45 ++++++++- > > arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++-- > > arch/riscv/kernel/vector.c | 2 +- > > 6 files changed, 238 insertions(+), 17 deletions(-) > > > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata > > index 0c8f4652cd82..b461312dd452 100644 > > --- a/arch/riscv/Kconfig.errata > > +++ b/arch/riscv/Kconfig.errata > > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU > > > > If you don't know what to do here, say "Y". > > > > +config ERRATA_THEAD_VECTOR > > + bool "Apply T-Head Vector errata" > > + depends on ERRATA_THEAD && RISCV_ISA_V > > + default y > > + help > > + The T-Head C9xx cores implement an earlier version 0.7.1 > > + of the vector extensions. > > + > > + This will apply the necessary errata to handle the non-standard > > + behaviour via when switch to and from vector mode for processes. > > + > > + If you don't know what to do here, say "Y". > > + > > endmenu # "CPU errata selection" > > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c > > index c259dc925ec1..c41ec84bc8a5 100644 > > --- a/arch/riscv/errata/thead/errata.c > > +++ b/arch/riscv/errata/thead/errata.c > > @@ -15,6 +15,7 @@ > > #include <asm/errata_list.h> > > #include <asm/hwprobe.h> > > #include <asm/patch.h> > > +#include <asm/vector.h> > > #include <asm/vendorid_list.h> > > > > static bool errata_probe_pbmt(unsigned int stage, > > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage, > > return true; > > } > > > > +static bool errata_probe_vector(unsigned int stage, > > + unsigned long arch_id, unsigned long impid) > > +{ > > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR)) > > + return false; > > + > > + /* target-c9xx cores report arch_id and impid as 0 */ > > + if (arch_id != 0 || impid != 0) > > + return false; > > + > > + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) { > > + /* > > + * Disable VECTOR to detect illegal usage of vector in kernel. > > + * This is normally done in _start_kernel but with the > > + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's > > + * vector-0.7.1 and the vector-1.0-bits are unused there. > > + */ > > + csr_clear(CSR_STATUS, SR_VS_THEAD); > > + return false; > > + } > > + > > + /* let has_vector() return true and set the static vlen */ > > Hmm, I was wondering about how you were going to communicate this to > userspace, since you're not going to be setting "v" in your DT, so > there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that > this is your intention, as you'd not need to drop the alternative-based > stuff from has_vector() if it wasn't) I'm working on the assumption that the t-head vector is way to different from the official vector, that a userspace will definitly need to handle this in some way specially and we can't claim to use a "real" vector spec. So in this first step, my goal is to simply allow userspace programs compiled to use the t-head vector instructions (i.e. 0.7.1 presumably) to not hang the kernel and do all the necessary bringup and teardown needed for executing those vector instructions ;-) . > I don't think you can do this, as things stand, because of how hwprobe > operates: > > static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, > const struct cpumask *cpus) > { > ... > > if (has_vector()) > pair->value |= RISCV_HWPROBE_IMA_V; > > ... > } > > * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by > version 1.0 of the RISC-V Vector extension manual. > > You'll need to change hwprobe to use has_vector() && > riscv_has_extension_unlikely(v), or similar, as the condition for > reporting. ah right, and yes I need to adapt hwprobe as you wrote. > You'll also need some other way to communicate to userspace > that T-Head's vector stuff is supported, no? As I said above, baby-steps - not-dying first ;-) . > I'm also _really_ unconvinced that turning on extensions that were not > indicated in the DT or via ACPI is something we should be doing. Have I > missed something here that'd make that assessment inaccurate? Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration space (sermon of DT maintainers for years), so the ISA string in DT will simply describe _all_ extensions the hardware supports. So there _should_ never be a case of "I want to disable vectors and will remove the letter from the ISA string". For T-Head we _know_ from vendor-id and friends that the core supports this special brand of vectors. We're also turning on the t-head equivalent of svpbmt and zicbom with probably the same reasoning. > FWIW I am currently working on kernel-side support for the new > extension properties that I have posted dt-binding patches for. > I'll go post it once Palmer has merged his current set of patches in his > staging repo into for-next, as I've got a lot of deps on riscv,isa > parser changes. > I'm really hoping that it provides an easier way to go off probing for > vendor specific stuff for DT-based systems, since it will no longer > require complex probing, just an of_property_match_string() for each > possible cpu and we could very well provide a vendor hook during that > process. > Clearly though, that stuff is not yet merged as it has not even been > posted yet. And with the comments I received, T-Head vector also is not ready for prime-time yet, so we're all good :-) Heiko > Current WIP of that is here: > https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer > > > + riscv_vector_supported(); > > + riscv_v_vsize = 128 / 8 * 32; > > + > > + return true; > > +} > > + > > static u32 thead_errata_probe(unsigned int stage, > > unsigned long archid, unsigned long impid) > > { > > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage, > > if (errata_probe_pmu(stage, archid, impid)) > > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); > > > > + if (errata_probe_vector(stage, archid, impid)) > > + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR); > > + > > return cpu_req_errata; > > } > > > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > > index 2d79bca6ffe8..521b3b939e51 100644 > > --- a/arch/riscv/include/asm/csr.h > > +++ b/arch/riscv/include/asm/csr.h > > @@ -24,11 +24,25 @@ > > #define SR_FS_CLEAN _AC(0x00004000, UL) > > #define SR_FS_DIRTY _AC(0x00006000, UL) > > > > -#define SR_VS _AC(0x00000600, UL) /* Vector Status */ > > -#define SR_VS_OFF _AC(0x00000000, UL) > > -#define SR_VS_INITIAL _AC(0x00000200, UL) > > -#define SR_VS_CLEAN _AC(0x00000400, UL) > > -#define SR_VS_DIRTY _AC(0x00000600, UL) > > +#define SR_VS_OFF _AC(0x00000000, UL) > > + > > +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */ > > +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL) > > +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL) > > +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL) > > + > > +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */ > > +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) > > +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) > > +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) > > + > > +/* > > + * Always default to vector-1.0 handling in assembly and let the broken > > + * implementations handle their case separately. > > + */ > > +#ifdef __ASSEMBLY__ > > +#define SR_VS SR_VS_1_0 > > +#endif > > > > #define SR_XS _AC(0x00018000, UL) /* Extension Status */ > > #define SR_XS_OFF _AC(0x00000000, UL) > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > > index fb1a810f3d8c..ab21fadbe9c6 100644 > > --- a/arch/riscv/include/asm/errata_list.h > > +++ b/arch/riscv/include/asm/errata_list.h > > @@ -21,7 +21,8 @@ > > #define ERRATA_THEAD_PBMT 0 > > #define ERRATA_THEAD_CMO 1 > > #define ERRATA_THEAD_PMU 2 > > -#define ERRATA_THEAD_NUMBER 3 > > +#define ERRATA_THEAD_VECTOR 3 > > +#define ERRATA_THEAD_NUMBER 4 > > #endif > > > > #ifdef __ASSEMBLY__ > > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \ > > : "=r" (__ovl) : \ > > : "memory") > > > > +#ifdef CONFIG_ERRATA_THEAD_VECTOR > > + > > +#define THEAD_C9XX_CSR_VXSAT 0x9 > > +#define THEAD_C9XX_CSR_VXRM 0xa > > + > > +/* > > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older > > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for > > + * vsetvli t4, x0, e8, m8, d1 > > + */ > > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" > > + > > +/* > > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same > > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize > > + * the call resulting in a different encoding and then using a value for > > + * the "mop" field that is not part of vector-0.7.1 > > + * So encode specific variants for vstate_save and _restore. > > + */ > > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" > > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" > > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" > > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" > > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" > > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" > > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" > > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" > > + > > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 > > +#define ALT_SR_VS_THEAD_SHIFT 23 > > + > > +#define ALT_SR_VS(_val, prot) \ > > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ > > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ > > + : "=r"(_val) \ > > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ > > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ > > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ > > + "I"(ALT_SR_VS_THEAD_SHIFT)) > > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ > > + > > #endif /* __ASSEMBLY__ */ > > > > #endif > > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h > > index 315c96d2b4d0..fa47f60f81e3 100644 > > --- a/arch/riscv/include/asm/vector.h > > +++ b/arch/riscv/include/asm/vector.h > > @@ -18,6 +18,55 @@ > > #include <asm/hwcap.h> > > #include <asm/csr.h> > > #include <asm/asm.h> > > +#include <asm/errata_list.h> > > + > > +#ifdef CONFIG_ERRATA_THEAD_VECTOR > > + > > +static inline unsigned long riscv_sr_vs(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS); > > + return val; > > +} > > + > > +static inline unsigned long riscv_sr_vs_initial(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS_INITIAL); > > + return val; > > +} > > + > > +static inline unsigned long riscv_sr_vs_clean(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS_CLEAN); > > + return val; > > +} > > + > > +static inline unsigned long riscv_sr_vs_dirty(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS_DIRTY); > > + return val; > > +} > > + > > +#define SR_VS riscv_sr_vs() > > +#define SR_VS_INITIAL riscv_sr_vs_initial() > > +#define SR_VS_CLEAN riscv_sr_vs_clean() > > +#define SR_VS_DIRTY riscv_sr_vs_dirty() > > + > > +#else /* CONFIG_ERRATA_THEAD_VECTOR */ > > + > > +#define SR_VS SR_VS_1_0 > > +#define SR_VS_INITIAL SR_VS_INITIAL_1_0 > > +#define SR_VS_CLEAN SR_VS_CLEAN_1_0 > > +#define SR_VS_DIRTY SR_VS_DIRTY_1_0 > > + > > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ > > > > extern bool riscv_v_supported; > > void riscv_vector_supported(void); > > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void) > > > > static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) > > { > > - asm volatile ( > > + register u32 t1 asm("t1") = (SR_FS); > > + > > + /* > > + * CSR_VCSR is defined as > > + * [2:1] - vxrm[1:0] > > + * [0] - vxsat > > + * The earlier vector spec implemented by T-Head uses separate > > + * registers for the same bit-elements, so just combine those > > + * into the existing output field. > > + * > > + * Additionally T-Head cores need FS to be enabled when accessing > > + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. > > + * Though the cores do not implement the VXRM and VXSAT fields in the > > + * FCSR CSR that vector-0.7.1 specifies. > > + */ > > + asm volatile (ALTERNATIVE( > > "csrr %0, " __stringify(CSR_VSTART) "\n\t" > > "csrr %1, " __stringify(CSR_VTYPE) "\n\t" > > "csrr %2, " __stringify(CSR_VL) "\n\t" > > "csrr %3, " __stringify(CSR_VCSR) "\n\t" > > + __nops(5), > > + "csrs sstatus, t1\n\t" > > + "csrr %0, " __stringify(CSR_VSTART) "\n\t" > > + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" > > + "csrr %2, " __stringify(CSR_VL) "\n\t" > > + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t" > > + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" > > + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t" > > + "or %3, %3, t4\n\t" > > + "csrc sstatus, t1\n\t", > > + THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), > > - "=r" (dest->vcsr) : :); > > + "=r" (dest->vcsr) : "r"(t1) : "t4"); > > } > > > > static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) > > { > > - asm volatile ( > > + register u32 t1 asm("t1") = (SR_FS); > > + > > + /* > > + * Similar to __vstate_csr_save above, restore values for the > > + * separate VXRM and VXSAT CSRs from the vcsr variable. > > + */ > > + asm volatile (ALTERNATIVE( > > ".option push\n\t" > > ".option arch, +v\n\t" > > "vsetvl x0, %2, %1\n\t" > > ".option pop\n\t" > > "csrw " __stringify(CSR_VSTART) ", %0\n\t" > > "csrw " __stringify(CSR_VCSR) ", %3\n\t" > > + __nops(6), > > + "csrs sstatus, t1\n\t" > > + ".option push\n\t" > > + ".option arch, +v\n\t" > > + "vsetvl x0, %2, %1\n\t" > > + ".option pop\n\t" > > + "csrw " __stringify(CSR_VSTART) ", %0\n\t" > > + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" > > + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" > > + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" > > + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" > > + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" > > + "csrc sstatus, t1\n\t", > > + THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), > > - "r" (src->vcsr) :); > > + "r" (src->vcsr), "r"(t1) : "t4"); > > } > > > > static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > > > > riscv_v_enable(); > > __vstate_csr_save(save_to); > > - asm volatile ( > > + asm volatile (ALTERNATIVE( > > + "nop\n\t" > > ".option push\n\t" > > ".option arch, +v\n\t" > > "vsetvli %0, x0, e8, m8, ta, ma\n\t" > > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > > "vse8.v v16, (%1)\n\t" > > "add %1, %1, %0\n\t" > > "vse8.v v24, (%1)\n\t" > > - ".option pop\n\t" > > - : "=&r" (vl) : "r" (datap) : "memory"); > > + ".option pop\n\t", > > + "mv t0, %1\n\t" > > + THEAD_VSETVLI_T4X0E8M8D1 > > + THEAD_VSB_V_V0T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VSB_V_V8T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VSB_V_V16T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); > > riscv_v_disable(); > > } > > > > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ > > unsigned long vl; > > > > riscv_v_enable(); > > - asm volatile ( > > + asm volatile (ALTERNATIVE( > > + "nop\n\t" > > ".option push\n\t" > > ".option arch, +v\n\t" > > "vsetvli %0, x0, e8, m8, ta, ma\n\t" > > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ > > "vle8.v v16, (%1)\n\t" > > "add %1, %1, %0\n\t" > > "vle8.v v24, (%1)\n\t" > > - ".option pop\n\t" > > - : "=&r" (vl) : "r" (datap) : "memory"); > > + ".option pop\n\t", > > + "mv t0, %1\n\t" > > + THEAD_VSETVLI_T4X0E8M8D1 > > + THEAD_VLB_V_V0T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VLB_V_V8T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VLB_V_V16T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > + : "=&r" (vl) : "r" (datap) : "t0", "t4"); > > __vstate_csr_restore(restore_from); > > riscv_v_disable(); > > } > > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c > > index 74178fb71805..51726890a4d0 100644 > > --- a/arch/riscv/kernel/vector.c > > +++ b/arch/riscv/kernel/vector.c > > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) > > u32 insn = (u32)regs->badaddr; > > > > /* Do not handle if V is not supported, or disabled */ > > - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) > > + if (!has_vector()) > > return false; > > > > /* If V has been enabled then it is not the first-use trap */ >
On Fri, Jun 23, 2023 at 12:40:43PM +0200, Heiko Stübner wrote: > Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley: > > On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote: > > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > I'm also _really_ unconvinced that turning on extensions that were not > > indicated in the DT or via ACPI is something we should be doing. Have I > > missed something here that'd make that assessment inaccurate? > > Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration > space (sermon of DT maintainers for years), so the ISA string in DT will > simply describe _all_ extensions the hardware supports. So there _should_ > never be a case of "I want to disable vectors and will remove the letter > from the ISA string". I think I pointed it out previously, on the thread about using the isa string in hwcap that you proposed, but it was things like hypervisors that modify the DT that they pass to guests that I was talking about here, rather than an end-user. Obviously this doesn't apply to things that do not have hypervisor support, but if/when those do exist you'd be relying on them not having the empty arch/impl ids. > We're also turning on the t-head equivalent of svpbmt and zicbom with > probably the same reasoning. I'd argue that we should describe these things in whatever a non isa string DT property ends up looking like, even if we missed the boat on putting them in riscv,isa. Maybe this is a self-serving interpretation, but I see the svpbmt and zicbom equivalents somewhat differently. They're done under the hood, ostensibly to make the thing spec compliant (it still claims to be rv64gc). This one is "turn on a new, user-visible, feature", rather than "we implement a standard thing, but it is broken, so silently fix it up". I would probably feel differently about this aspect of things if there was no intention to actually communicate the presence of the extension to userspace. > For T-Head we _know_ from vendor-id and friends that the core supports > this special brand of vectors. If we _know_ on Foobar SoC that it supports xyz extension based on vendor_id etc, should we add detection for that that too, using those as a basis? I really don't want to have a precedent for T-Head getting to use this method (will the same logic apply to their bitmanip stuff?), that is not going to be applied to other vendors. Hopefully that better explains where I am coming from, lmk if I am overlooking something that should be obvious. Cheers, Conor.
Hi Heiko, kernel test robot noticed the following build errors: [auto build test ERROR on next-20230622] [cannot apply to linus/master v6.4-rc7 v6.4-rc6 v6.4-rc5 v6.4-rc7] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Heiko-Stuebner/RISC-V-move-vector-available-status-into-a-dedicated-variable/20230623-081314 base: next-20230622 patch link: https://lore.kernel.org/r/20230622231305.631331-4-heiko%40sntech.de patch subject: [PATCH v2 3/3] RISC-V: add T-Head vector errata handling config: riscv-rv32_defconfig (https://download.01.org/0day-ci/archive/20230623/202306232111.5WpYab2n-lkp@intel.com/config) compiler: riscv32-linux-gcc (GCC) 12.3.0 reproduce: (https://download.01.org/0day-ci/archive/20230623/202306232111.5WpYab2n-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202306232111.5WpYab2n-lkp@intel.com/ All errors (new ones prefixed by >>): arch/riscv/include/asm/vector.h: Assembler messages: >> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1' >> arch/riscv/include/asm/vector.h:194: Error: unrecognized opcode `srliw t4,a1,1' >> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards arch/riscv/include/asm/vector.h:203: Error: attempt to move .org backwards -- arch/riscv/include/asm/vector.h: Assembler messages: >> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1' >> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards -- arch/riscv/include/asm/vector.h: Assembler messages: >> arch/riscv/include/asm/vector.h:194: Error: unrecognized opcode `srliw t4,a1,1' >> arch/riscv/include/asm/vector.h:162: Error: unrecognized opcode `slliw a4,a4,1' arch/riscv/include/asm/vector.h:203: Error: attempt to move .org backwards >> arch/riscv/include/asm/vector.h:169: Error: attempt to move .org backwards vim +162 arch/riscv/include/asm/vector.h 03c3fcd9941a17 Greentime Hu 2023-06-05 150 03c3fcd9941a17 Greentime Hu 2023-06-05 151 static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) 03c3fcd9941a17 Greentime Hu 2023-06-05 152 { 5255e253b722bb Heiko Stuebner 2023-06-23 153 register u32 t1 asm("t1") = (SR_FS); 5255e253b722bb Heiko Stuebner 2023-06-23 154 5255e253b722bb Heiko Stuebner 2023-06-23 155 /* 5255e253b722bb Heiko Stuebner 2023-06-23 156 * Similar to __vstate_csr_save above, restore values for the 5255e253b722bb Heiko Stuebner 2023-06-23 157 * separate VXRM and VXSAT CSRs from the vcsr variable. 5255e253b722bb Heiko Stuebner 2023-06-23 158 */ 5255e253b722bb Heiko Stuebner 2023-06-23 159 asm volatile (ALTERNATIVE( 03c3fcd9941a17 Greentime Hu 2023-06-05 160 ".option push\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 161 ".option arch, +v\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 @162 "vsetvl x0, %2, %1\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 163 ".option pop\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 164 "csrw " __stringify(CSR_VSTART) ", %0\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 165 "csrw " __stringify(CSR_VCSR) ", %3\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 166 __nops(6), 5255e253b722bb Heiko Stuebner 2023-06-23 167 "csrs sstatus, t1\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 168 ".option push\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 @169 ".option arch, +v\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 170 "vsetvl x0, %2, %1\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 171 ".option pop\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 172 "csrw " __stringify(CSR_VSTART) ", %0\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 173 "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 174 "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 175 "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 176 "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 177 "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 178 "csrc sstatus, t1\n\t", 5255e253b722bb Heiko Stuebner 2023-06-23 179 THEAD_VENDOR_ID, 5255e253b722bb Heiko Stuebner 2023-06-23 180 ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) 03c3fcd9941a17 Greentime Hu 2023-06-05 181 : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), 5255e253b722bb Heiko Stuebner 2023-06-23 182 "r" (src->vcsr), "r"(t1) : "t4"); 03c3fcd9941a17 Greentime Hu 2023-06-05 183 } 03c3fcd9941a17 Greentime Hu 2023-06-05 184 03c3fcd9941a17 Greentime Hu 2023-06-05 185 static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, 03c3fcd9941a17 Greentime Hu 2023-06-05 186 void *datap) 03c3fcd9941a17 Greentime Hu 2023-06-05 187 { 03c3fcd9941a17 Greentime Hu 2023-06-05 188 unsigned long vl; 03c3fcd9941a17 Greentime Hu 2023-06-05 189 03c3fcd9941a17 Greentime Hu 2023-06-05 190 riscv_v_enable(); 03c3fcd9941a17 Greentime Hu 2023-06-05 191 __vstate_csr_save(save_to); 5255e253b722bb Heiko Stuebner 2023-06-23 192 asm volatile (ALTERNATIVE( 5255e253b722bb Heiko Stuebner 2023-06-23 193 "nop\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 @194 ".option push\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 195 ".option arch, +v\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 196 "vsetvli %0, x0, e8, m8, ta, ma\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 197 "vse8.v v0, (%1)\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 198 "add %1, %1, %0\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 199 "vse8.v v8, (%1)\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 200 "add %1, %1, %0\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 201 "vse8.v v16, (%1)\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 202 "add %1, %1, %0\n\t" 03c3fcd9941a17 Greentime Hu 2023-06-05 203 "vse8.v v24, (%1)\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 204 ".option pop\n\t", 5255e253b722bb Heiko Stuebner 2023-06-23 205 "mv t0, %1\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 206 THEAD_VSETVLI_T4X0E8M8D1 5255e253b722bb Heiko Stuebner 2023-06-23 207 THEAD_VSB_V_V0T0 5255e253b722bb Heiko Stuebner 2023-06-23 208 "addi t0, t0, 128\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 209 THEAD_VSB_V_V8T0 5255e253b722bb Heiko Stuebner 2023-06-23 210 "addi t0, t0, 128\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 211 THEAD_VSB_V_V16T0 5255e253b722bb Heiko Stuebner 2023-06-23 212 "addi t0, t0, 128\n\t" 5255e253b722bb Heiko Stuebner 2023-06-23 213 THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, 5255e253b722bb Heiko Stuebner 2023-06-23 214 ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) 5255e253b722bb Heiko Stuebner 2023-06-23 215 : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); 03c3fcd9941a17 Greentime Hu 2023-06-05 216 riscv_v_disable(); 03c3fcd9941a17 Greentime Hu 2023-06-05 217 } 03c3fcd9941a17 Greentime Hu 2023-06-05 218
On Fri, Jun 23, 2023, at 6:40 AM, Heiko Stübner wrote: > Hey Conor, > > Am Freitag, 23. Juni 2023, 11:49:41 CEST schrieb Conor Dooley: >> On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote: >> > From: Heiko Stuebner <heiko.stuebner@vrull.eu> >> > >> > T-Head C9xx cores implement an older version (0.7.1) of the vector >> > specification. >> > >> > Relevant changes concerning the kernel are: >> > - different placement of the SR_VS bit for the vector unit status >> > - different encoding of the vsetvli instruction >> > - different instructions for loads and stores >> > >> > And a fixed VLEN of 128. >> > >> > The in-kernel access to vector instances is limited to the save and >> > restore of process states so the above mentioned areas can simply be >> > handled via the alternatives framework, similar to other T-Head specific >> > issues. >> > >> > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> >> > --- >> > arch/riscv/Kconfig.errata | 13 +++ >> > arch/riscv/errata/thead/errata.c | 32 ++++++ >> > arch/riscv/include/asm/csr.h | 24 ++++- >> > arch/riscv/include/asm/errata_list.h | 45 ++++++++- >> > arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++-- >> > arch/riscv/kernel/vector.c | 2 +- >> > 6 files changed, 238 insertions(+), 17 deletions(-) >> > >> > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata >> > index 0c8f4652cd82..b461312dd452 100644 >> > --- a/arch/riscv/Kconfig.errata >> > +++ b/arch/riscv/Kconfig.errata >> > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU >> > >> > If you don't know what to do here, say "Y". >> > >> > +config ERRATA_THEAD_VECTOR >> > + bool "Apply T-Head Vector errata" >> > + depends on ERRATA_THEAD && RISCV_ISA_V >> > + default y >> > + help >> > + The T-Head C9xx cores implement an earlier version 0.7.1 >> > + of the vector extensions. >> > + >> > + This will apply the necessary errata to handle the non-standard >> > + behaviour via when switch to and from vector mode for processes. Doesn't make sense. "This will apply the necessary errata to handle the non-standard behavior when enabling, disabling, or swapping vector state for processes."? >> > + >> > + If you don't know what to do here, say "Y". >> > + >> > endmenu # "CPU errata selection" >> > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c >> > index c259dc925ec1..c41ec84bc8a5 100644 >> > --- a/arch/riscv/errata/thead/errata.c >> > +++ b/arch/riscv/errata/thead/errata.c >> > @@ -15,6 +15,7 @@ >> > #include <asm/errata_list.h> >> > #include <asm/hwprobe.h> >> > #include <asm/patch.h> >> > +#include <asm/vector.h> >> > #include <asm/vendorid_list.h> >> > >> > static bool errata_probe_pbmt(unsigned int stage, >> > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage, >> > return true; >> > } >> > >> > +static bool errata_probe_vector(unsigned int stage, >> > + unsigned long arch_id, unsigned long impid) >> > +{ >> > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR)) >> > + return false; >> > + >> > + /* target-c9xx cores report arch_id and impid as 0 */ >> > + if (arch_id != 0 || impid != 0) >> > + return false; >> > + >> > + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) { >> > + /* >> > + * Disable VECTOR to detect illegal usage of vector in kernel. >> > + * This is normally done in _start_kernel but with the >> > + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's >> > + * vector-0.7.1 and the vector-1.0-bits are unused there. >> > + */ >> > + csr_clear(CSR_STATUS, SR_VS_THEAD); >> > + return false; >> > + } >> > + >> > + /* let has_vector() return true and set the static vlen */ >> >> Hmm, I was wondering about how you were going to communicate this to >> userspace, since you're not going to be setting "v" in your DT, so >> there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that >> this is your intention, as you'd not need to drop the alternative-based >> stuff from has_vector() if it wasn't) > > I'm working on the assumption that the t-head vector is way to different > from the official vector, that a userspace will definitly need to handle this > in some way specially and we can't claim to use a "real" vector spec. > > So in this first step, my goal is to simply allow userspace programs > compiled to use the t-head vector instructions (i.e. 0.7.1 presumably) to > not hang the kernel and do all the necessary bringup and teardown needed > for executing those vector instructions ;-) . > > >> I don't think you can do this, as things stand, because of how hwprobe >> operates: >> >> static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, >> const struct cpumask *cpus) >> { >> ... >> >> if (has_vector()) >> pair->value |= RISCV_HWPROBE_IMA_V; >> >> ... >> } >> >> * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by >> version 1.0 of the RISC-V Vector extension manual. >> >> You'll need to change hwprobe to use has_vector() && >> riscv_has_extension_unlikely(v), or similar, as the condition for >> reporting. > > ah right, and yes I need to adapt hwprobe as you wrote. > > >> You'll also need some other way to communicate to userspace >> that T-Head's vector stuff is supported, no? > > As I said above, baby-steps - not-dying first ;-) . (Count me as a vote for adding a new RISCV_HWPROBE_KEY_ - ints are cheap and apart from the half-implemented heterogenous case, the only advantage of hwprobe over hwcap is that we can support a virtually unlimited number of draft and vendor extensions.) >> I'm also _really_ unconvinced that turning on extensions that were not >> indicated in the DT or via ACPI is something we should be doing. Have I >> missed something here that'd make that assessment inaccurate? > > Hmm, DT (and ACPI) is a (static) hardware-description, not a configuration > space (sermon of DT maintainers for years), so the ISA string in DT will > simply describe _all_ extensions the hardware supports. So there _should_ > never be a case of "I want to disable vectors and will remove the letter > from the ISA string". > > For T-Head we _know_ from vendor-id and friends that the core supports > this special brand of vectors. C906 supports t-head/0.7.1 vectors as a configuration option. The C906 in the D1 and BL808 has vectors, the recently announced CV1800B has one C906 with vectors and one without, and I vaguely remember seeing a chip with only a non-vector C906. C908 (announced, no manual yet) claims V 1.0 support. Presumably it will not support 0.7.1. C910 (exists on evaluation boards) lacks vector support. C920 (TH1520, SG2042, etc) has 0.7.1 support, at least superficially compatible with C906-with-vectors. Hopefully we can share errata. This probably needs to be handled as an orthogonal "xtheadv" or "v0p7p1" extension in whatever replaces riscv,isa. > We're also turning on the t-head equivalent of svpbmt and zicbom with > probably the same reasoning. In an ideal world those would be handled as extensions as well - T-Head fixed their vectors with the C908 so they might do standards-compliant Svpbmt and Zicbom in the future. >> FWIW I am currently working on kernel-side support for the new >> extension properties that I have posted dt-binding patches for. >> I'll go post it once Palmer has merged his current set of patches in his >> staging repo into for-next, as I've got a lot of deps on riscv,isa >> parser changes. >> I'm really hoping that it provides an easier way to go off probing for >> vendor specific stuff for DT-based systems, since it will no longer >> require complex probing, just an of_property_match_string() for each >> possible cpu and we could very well provide a vendor hook during that >> process. >> Clearly though, that stuff is not yet merged as it has not even been >> posted yet. > > And with the comments I received, T-Head vector also is not ready for > prime-time yet, so we're all good :-) > > > Heiko > > >> Current WIP of that is here: >> https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer >> >> > + riscv_vector_supported(); >> > + riscv_v_vsize = 128 / 8 * 32; >> > + >> > + return true; >> > +} >> > + >> > static u32 thead_errata_probe(unsigned int stage, >> > unsigned long archid, unsigned long impid) >> > { >> > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage, >> > if (errata_probe_pmu(stage, archid, impid)) >> > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); >> > >> > + if (errata_probe_vector(stage, archid, impid)) >> > + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR); >> > + >> > return cpu_req_errata; >> > } >> > >> > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h >> > index 2d79bca6ffe8..521b3b939e51 100644 >> > --- a/arch/riscv/include/asm/csr.h >> > +++ b/arch/riscv/include/asm/csr.h >> > @@ -24,11 +24,25 @@ >> > #define SR_FS_CLEAN _AC(0x00004000, UL) >> > #define SR_FS_DIRTY _AC(0x00006000, UL) >> > >> > -#define SR_VS _AC(0x00000600, UL) /* Vector Status */ >> > -#define SR_VS_OFF _AC(0x00000000, UL) >> > -#define SR_VS_INITIAL _AC(0x00000200, UL) >> > -#define SR_VS_CLEAN _AC(0x00000400, UL) >> > -#define SR_VS_DIRTY _AC(0x00000600, UL) >> > +#define SR_VS_OFF _AC(0x00000000, UL) >> > + >> > +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */ >> > +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL) >> > +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL) >> > +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL) >> > + >> > +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */ >> > +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) >> > +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) >> > +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) >> > + >> > +/* >> > + * Always default to vector-1.0 handling in assembly and let the broken >> > + * implementations handle their case separately. >> > + */ >> > +#ifdef __ASSEMBLY__ >> > +#define SR_VS SR_VS_1_0 >> > +#endif >> > >> > #define SR_XS _AC(0x00018000, UL) /* Extension Status */ >> > #define SR_XS_OFF _AC(0x00000000, UL) >> > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h >> > index fb1a810f3d8c..ab21fadbe9c6 100644 >> > --- a/arch/riscv/include/asm/errata_list.h >> > +++ b/arch/riscv/include/asm/errata_list.h >> > @@ -21,7 +21,8 @@ >> > #define ERRATA_THEAD_PBMT 0 >> > #define ERRATA_THEAD_CMO 1 >> > #define ERRATA_THEAD_PMU 2 >> > -#define ERRATA_THEAD_NUMBER 3 >> > +#define ERRATA_THEAD_VECTOR 3 >> > +#define ERRATA_THEAD_NUMBER 4 >> > #endif >> > >> > #ifdef __ASSEMBLY__ >> > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \ >> > : "=r" (__ovl) : \ >> > : "memory") >> > >> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR >> > + >> > +#define THEAD_C9XX_CSR_VXSAT 0x9 >> > +#define THEAD_C9XX_CSR_VXRM 0xa These CSR numbers haven't changed. Assuming that we actually need to handle vxsat and vxrm as part of the vector state (if t-head decided to make them controlled by sstatus.VS despite being in fcsr), why not unconditionally define CSR_VXSAT and CSR_VXRM in csr.h? >> > + >> > +/* >> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older >> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for >> > + * vsetvli t4, x0, e8, m8, d1 >> > + */ >> > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" >> > + >> > +/* >> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same >> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize >> > + * the call resulting in a different encoding and then using a value for >> > + * the "mop" field that is not part of vector-0.7.1 >> > + * So encode specific variants for vstate_save and _restore. >> > + */ >> > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" >> > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" >> > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" >> > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" >> > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" >> > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" >> > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" >> > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" .insn isn't supported by the kernel's minimum binutils version, but it _is_ supported by the oldest version of binutils that can assemble rvv 1.0 instructions. OP_V requires 2.39 so I use a literal 0x57 instead. very untested, and I leave it to your judgement whether it actually improves readability: #define THEAD_VSETVLI_T4X0E8M8D1 ".insn i 0x57, 7, t4, x0, 3\n\t" #define THEAD_VSB_V_V0T0 ".insn r STORE_FP, 0, 1, x0, t0, x0\n\t" #define THEAD_VSB_V_V8T0 ".insn r STORE_FP, 0, 1, x8, t0, x0\n\t" #define THEAD_VSB_V_V16T0 ".insn r STORE_FP, 0, 1, x16, t0, x0\n\t" #define THEAD_VSB_V_V24T0 ".insn r STORE_FP, 0, 1, x24, t0, x0\n\t" #define THEAD_VSB_V_V0T0 ".insn r LOAD_FP, 0, 9, x0, t0, x0\n\t" #define THEAD_VSB_V_V8T0 ".insn r LOAD_FP, 0, 9, x8, t0, x0\n\t" #define THEAD_VSB_V_V16T0 ".insn r LOAD_FP, 0, 9, x16, t0, x0\n\t" #define THEAD_VSB_V_V24T0 ".insn r LOAD_FP, 0, 9, x24, t0, x0\n\t" >> > + >> > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 >> > +#define ALT_SR_VS_THEAD_SHIFT 23 >> > + >> > +#define ALT_SR_VS(_val, prot) \ >> > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ >> > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ >> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ >> > + : "=r"(_val) \ >> > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ >> > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ >> > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ >> > + "I"(ALT_SR_VS_THEAD_SHIFT)) I think this can be simplified by removing the shifts and using the li pseudoinstruction (which will become lui on the _THEAD_ arm). >> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ >> > + >> > #endif /* __ASSEMBLY__ */ >> > >> > #endif >> > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h >> > index 315c96d2b4d0..fa47f60f81e3 100644 >> > --- a/arch/riscv/include/asm/vector.h >> > +++ b/arch/riscv/include/asm/vector.h >> > @@ -18,6 +18,55 @@ >> > #include <asm/hwcap.h> >> > #include <asm/csr.h> >> > #include <asm/asm.h> >> > +#include <asm/errata_list.h> >> > + >> > +#ifdef CONFIG_ERRATA_THEAD_VECTOR >> > + >> > +static inline unsigned long riscv_sr_vs(void) >> > +{ >> > + u32 val; >> > + >> > + ALT_SR_VS(val, SR_VS); >> > + return val; >> > +} >> > + >> > +static inline unsigned long riscv_sr_vs_initial(void) >> > +{ >> > + u32 val; >> > + >> > + ALT_SR_VS(val, SR_VS_INITIAL); >> > + return val; >> > +} >> > + >> > +static inline unsigned long riscv_sr_vs_clean(void) >> > +{ >> > + u32 val; >> > + >> > + ALT_SR_VS(val, SR_VS_CLEAN); >> > + return val; >> > +} >> > + >> > +static inline unsigned long riscv_sr_vs_dirty(void) >> > +{ >> > + u32 val; >> > + >> > + ALT_SR_VS(val, SR_VS_DIRTY); >> > + return val; >> > +} >> > + >> > +#define SR_VS riscv_sr_vs() >> > +#define SR_VS_INITIAL riscv_sr_vs_initial() >> > +#define SR_VS_CLEAN riscv_sr_vs_clean() >> > +#define SR_VS_DIRTY riscv_sr_vs_dirty() >> > + >> > +#else /* CONFIG_ERRATA_THEAD_VECTOR */ >> > + >> > +#define SR_VS SR_VS_1_0 >> > +#define SR_VS_INITIAL SR_VS_INITIAL_1_0 >> > +#define SR_VS_CLEAN SR_VS_CLEAN_1_0 >> > +#define SR_VS_DIRTY SR_VS_DIRTY_1_0 >> > + >> > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ >> > >> > extern bool riscv_v_supported; >> > void riscv_vector_supported(void); >> > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void) >> > >> > static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) >> > { >> > - asm volatile ( >> > + register u32 t1 asm("t1") = (SR_FS); >> > + >> > + /* >> > + * CSR_VCSR is defined as >> > + * [2:1] - vxrm[1:0] >> > + * [0] - vxsat >> > + * The earlier vector spec implemented by T-Head uses separate >> > + * registers for the same bit-elements, so just combine those >> > + * into the existing output field. >> > + * >> > + * Additionally T-Head cores need FS to be enabled when accessing >> > + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. >> > + * Though the cores do not implement the VXRM and VXSAT fields in the >> > + * FCSR CSR that vector-0.7.1 specifies. (for completeness only: this was found to be inaccurate in the previous thread) >> > + */ >> > + asm volatile (ALTERNATIVE( >> > "csrr %0, " __stringify(CSR_VSTART) "\n\t" >> > "csrr %1, " __stringify(CSR_VTYPE) "\n\t" >> > "csrr %2, " __stringify(CSR_VL) "\n\t" >> > "csrr %3, " __stringify(CSR_VCSR) "\n\t" >> > + __nops(5), >> > + "csrs sstatus, t1\n\t" >> > + "csrr %0, " __stringify(CSR_VSTART) "\n\t" >> > + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" >> > + "csrr %2, " __stringify(CSR_VL) "\n\t" >> > + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t" >> > + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" >> > + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t" >> > + "or %3, %3, t4\n\t" >> > + "csrc sstatus, t1\n\t", >> > + THEAD_VENDOR_ID, >> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) >> > : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), >> > - "=r" (dest->vcsr) : :); >> > + "=r" (dest->vcsr) : "r"(t1) : "t4"); >> > } >> > >> > static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) >> > { >> > - asm volatile ( >> > + register u32 t1 asm("t1") = (SR_FS); >> > + >> > + /* >> > + * Similar to __vstate_csr_save above, restore values for the >> > + * separate VXRM and VXSAT CSRs from the vcsr variable. >> > + */ >> > + asm volatile (ALTERNATIVE( >> > ".option push\n\t" >> > ".option arch, +v\n\t" >> > "vsetvl x0, %2, %1\n\t" >> > ".option pop\n\t" >> > "csrw " __stringify(CSR_VSTART) ", %0\n\t" >> > "csrw " __stringify(CSR_VCSR) ", %3\n\t" >> > + __nops(6), >> > + "csrs sstatus, t1\n\t" >> > + ".option push\n\t" >> > + ".option arch, +v\n\t" >> > + "vsetvl x0, %2, %1\n\t" >> > + ".option pop\n\t" >> > + "csrw " __stringify(CSR_VSTART) ", %0\n\t" >> > + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" >> > + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" >> > + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" >> > + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" >> > + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" >> > + "csrc sstatus, t1\n\t", >> > + THEAD_VENDOR_ID, >> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) >> > : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), >> > - "r" (src->vcsr) :); >> > + "r" (src->vcsr), "r"(t1) : "t4"); >> > } >> > >> > static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, >> > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, >> > >> > riscv_v_enable(); >> > __vstate_csr_save(save_to); >> > - asm volatile ( >> > + asm volatile (ALTERNATIVE( >> > + "nop\n\t" >> > ".option push\n\t" >> > ".option arch, +v\n\t" >> > "vsetvli %0, x0, e8, m8, ta, ma\n\t" >> > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, >> > "vse8.v v16, (%1)\n\t" >> > "add %1, %1, %0\n\t" >> > "vse8.v v24, (%1)\n\t" >> > - ".option pop\n\t" >> > - : "=&r" (vl) : "r" (datap) : "memory"); Pre-existing bug: The assembly code modifies %1, which is an input operand. I think this should be - : "=&r" (vl), "+r" (datap) : : "memory"); >> > + ".option pop\n\t", >> > + "mv t0, %1\n\t" >> > + THEAD_VSETVLI_T4X0E8M8D1 >> > + THEAD_VSB_V_V0T0 >> > + "addi t0, t0, 128\n\t" We don't have a promise from T-Head that they won't introduce a core with 0.7.1 vectors and VLEN=256, and I'd rather not have code lying around that will cause silent data corruption if that happens. THEAD_VSETVLI_T4X0E8M8D1 has rd=t4 so adding t4 should work in this arm. >> > + THEAD_VSB_V_V8T0 >> > + "addi t0, t0, 128\n\t" >> > + THEAD_VSB_V_V16T0 >> > + "addi t0, t0, 128\n\t" >> > + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, >> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) >> > + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); The bugfix for the existing code isn't needed here since you copied the address into t0. >> > riscv_v_disable(); >> > } >> > >> > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ >> > unsigned long vl; >> > >> > riscv_v_enable(); >> > - asm volatile ( >> > + asm volatile (ALTERNATIVE( >> > + "nop\n\t" >> > ".option push\n\t" >> > ".option arch, +v\n\t" >> > "vsetvli %0, x0, e8, m8, ta, ma\n\t" >> > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ >> > "vle8.v v16, (%1)\n\t" >> > "add %1, %1, %0\n\t" >> > "vle8.v v24, (%1)\n\t" >> > - ".option pop\n\t" >> > - : "=&r" (vl) : "r" (datap) : "memory"); - : "=&r" (vl), "+r" (datap) : : "memory"); -s >> > + ".option pop\n\t", >> > + "mv t0, %1\n\t" >> > + THEAD_VSETVLI_T4X0E8M8D1 >> > + THEAD_VLB_V_V0T0 >> > + "addi t0, t0, 128\n\t" >> > + THEAD_VLB_V_V8T0 >> > + "addi t0, t0, 128\n\t" >> > + THEAD_VLB_V_V16T0 >> > + "addi t0, t0, 128\n\t" >> > + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID, >> > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) >> > + : "=&r" (vl) : "r" (datap) : "t0", "t4"); >> > __vstate_csr_restore(restore_from); >> > riscv_v_disable(); >> > } >> > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c >> > index 74178fb71805..51726890a4d0 100644 >> > --- a/arch/riscv/kernel/vector.c >> > +++ b/arch/riscv/kernel/vector.c >> > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) >> > u32 insn = (u32)regs->badaddr; >> > >> > /* Do not handle if V is not supported, or disabled */ >> > - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) >> > + if (!has_vector()) >> > return false; >> > >> > /* If V has been enabled then it is not the first-use trap */ >> > > > > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Sat, Jun 24, 2023 at 01:18:26AM -0400, Stefan O'Rear wrote: > On Fri, Jun 23, 2023, at 6:40 AM, Heiko Stübner wrote: ... > >> > + > >> > +/* > >> > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older > >> > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for > >> > + * vsetvli t4, x0, e8, m8, d1 > >> > + */ > >> > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" > >> > + > >> > +/* > >> > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same > >> > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize > >> > + * the call resulting in a different encoding and then using a value for > >> > + * the "mop" field that is not part of vector-0.7.1 > >> > + * So encode specific variants for vstate_save and _restore. > >> > + */ > >> > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" > >> > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" > >> > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" > >> > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" > >> > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" > >> > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" > >> > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" > >> > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" > > .insn isn't supported by the kernel's minimum binutils version, but it _is_ > supported by the oldest version of binutils that can assemble rvv 1.0 > instructions. OP_V requires 2.39 so I use a literal 0x57 instead. > > very untested, and I leave it to your judgement whether it actually improves > readability: > > #define THEAD_VSETVLI_T4X0E8M8D1 ".insn i 0x57, 7, t4, x0, 3\n\t" > #define THEAD_VSB_V_V0T0 ".insn r STORE_FP, 0, 1, x0, t0, x0\n\t" > #define THEAD_VSB_V_V8T0 ".insn r STORE_FP, 0, 1, x8, t0, x0\n\t" > #define THEAD_VSB_V_V16T0 ".insn r STORE_FP, 0, 1, x16, t0, x0\n\t" > #define THEAD_VSB_V_V24T0 ".insn r STORE_FP, 0, 1, x24, t0, x0\n\t" > #define THEAD_VSB_V_V0T0 ".insn r LOAD_FP, 0, 9, x0, t0, x0\n\t" > #define THEAD_VSB_V_V8T0 ".insn r LOAD_FP, 0, 9, x8, t0, x0\n\t" > #define THEAD_VSB_V_V16T0 ".insn r LOAD_FP, 0, 9, x16, t0, x0\n\t" > #define THEAD_VSB_V_V24T0 ".insn r LOAD_FP, 0, 9, x24, t0, x0\n\t" > We have the INSN_R() macro in arch/riscv/include/asm/insn-def.h for stuff like this. Thanks, drew
Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit : > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > T-Head C9xx cores implement an older version (0.7.1) of the vector > specification. > > Relevant changes concerning the kernel are: > - different placement of the SR_VS bit for the vector unit status > - different encoding of the vsetvli instruction > - different instructions for loads and stores > > And a fixed VLEN of 128. Ultimately, conformant hardware also has a fixed VLEN of some value. So why is that relevant here? is the vlenb CSR not implemented? And even if so, c the value not be retrieved with vsetvli?
On Tue, Jun 27, 2023 at 06:21:14PM +0300, Rémi Denis-Courmont wrote: > Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit : > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > T-Head C9xx cores implement an older version (0.7.1) of the vector > > specification. > > > > Relevant changes concerning the kernel are: > > - different placement of the SR_VS bit for the vector unit status > > - different encoding of the vsetvli instruction > > - different instructions for loads and stores > > > > And a fixed VLEN of 128. > > Ultimately, conformant hardware also has a fixed VLEN of some value. > > So why is that relevant here? is the vlenb CSR not implemented? And even if > so, c the value not be retrieved with vsetvli? I was looking around a bit, and saw a random comment on reddit today claiming that the c920 has a vlen of 256. Obviously that conflicts with what is written here, but it is reddit... Do you know if that is true Heiko, and if it is true, does the c920 populate archid/impid with non-zero values? Cheers, Conor.
On Tue, 27 Jun 2023 09:12:48 PDT (-0700), Conor Dooley wrote: > On Tue, Jun 27, 2023 at 06:21:14PM +0300, Rémi Denis-Courmont wrote: >> Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit : >> > From: Heiko Stuebner <heiko.stuebner@vrull.eu> >> > >> > T-Head C9xx cores implement an older version (0.7.1) of the vector >> > specification. >> > >> > Relevant changes concerning the kernel are: >> > - different placement of the SR_VS bit for the vector unit status >> > - different encoding of the vsetvli instruction >> > - different instructions for loads and stores >> > >> > And a fixed VLEN of 128. >> >> Ultimately, conformant hardware also has a fixed VLEN of some value. >> >> So why is that relevant here? is the vlenb CSR not implemented? And even if >> so, c the value not be retrieved with vsetvli? > > I was looking around a bit, and saw a random comment on reddit today > claiming that the c920 has a vlen of 256. Obviously that conflicts with > what is written here, but it is reddit... > Do you know if that is true Heiko, and if it is true, does the c920 > populate archid/impid with non-zero values? We were talking in the patchwork a bit, it looks like there's already some aliasing in the T-Head implementations where the same marchid/mvendorid/mimplid tuple has different extensions. At that point we can't really detected based on the CSRs, so it sort of doesn't matter what the other stuff does (at least around V, if there's some other errata that's uniquely identified by the tuple then we can deal with it). So I think we need to add a way of indicating the hardware supports T-Head V from the device tree. I'd asked Charlie to add hwprobe support for the T-Head V stuff, LMK if we should pick up the DT side of things as well? > > Cheers, > Conor. > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
On Fri, Jun 23, 2023 at 5:53 PM Conor Dooley <conor.dooley@microchip.com> wrote: > > Hey Heiko, > > On Fri, Jun 23, 2023 at 01:13:05AM +0200, Heiko Stuebner wrote: > > From: Heiko Stuebner <heiko.stuebner@vrull.eu> > > > > T-Head C9xx cores implement an older version (0.7.1) of the vector > > specification. > > > > Relevant changes concerning the kernel are: > > - different placement of the SR_VS bit for the vector unit status > > - different encoding of the vsetvli instruction > > - different instructions for loads and stores > > > > And a fixed VLEN of 128. > > > > The in-kernel access to vector instances is limited to the save and > > restore of process states so the above mentioned areas can simply be > > handled via the alternatives framework, similar to other T-Head specific > > issues. > > > > Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu> > > --- > > arch/riscv/Kconfig.errata | 13 +++ > > arch/riscv/errata/thead/errata.c | 32 ++++++ > > arch/riscv/include/asm/csr.h | 24 ++++- > > arch/riscv/include/asm/errata_list.h | 45 ++++++++- > > arch/riscv/include/asm/vector.h | 139 +++++++++++++++++++++++++-- > > arch/riscv/kernel/vector.c | 2 +- > > 6 files changed, 238 insertions(+), 17 deletions(-) > > > > diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata > > index 0c8f4652cd82..b461312dd452 100644 > > --- a/arch/riscv/Kconfig.errata > > +++ b/arch/riscv/Kconfig.errata > > @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU > > > > If you don't know what to do here, say "Y". > > > > +config ERRATA_THEAD_VECTOR > > + bool "Apply T-Head Vector errata" > > + depends on ERRATA_THEAD && RISCV_ISA_V > > + default y > > + help > > + The T-Head C9xx cores implement an earlier version 0.7.1 > > + of the vector extensions. > > + > > + This will apply the necessary errata to handle the non-standard > > + behaviour via when switch to and from vector mode for processes. > > + > > + If you don't know what to do here, say "Y". > > + > > endmenu # "CPU errata selection" > > diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c > > index c259dc925ec1..c41ec84bc8a5 100644 > > --- a/arch/riscv/errata/thead/errata.c > > +++ b/arch/riscv/errata/thead/errata.c > > @@ -15,6 +15,7 @@ > > #include <asm/errata_list.h> > > #include <asm/hwprobe.h> > > #include <asm/patch.h> > > +#include <asm/vector.h> > > #include <asm/vendorid_list.h> > > > > static bool errata_probe_pbmt(unsigned int stage, > > @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage, > > return true; > > } > > > > +static bool errata_probe_vector(unsigned int stage, > > + unsigned long arch_id, unsigned long impid) > > +{ > > + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR)) > > + return false; > > + > > + /* target-c9xx cores report arch_id and impid as 0 */ > > + if (arch_id != 0 || impid != 0) > > + return false; > > + > > + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) { > > + /* > > + * Disable VECTOR to detect illegal usage of vector in kernel. > > + * This is normally done in _start_kernel but with the > > + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's > > + * vector-0.7.1 and the vector-1.0-bits are unused there. > > + */ > > + csr_clear(CSR_STATUS, SR_VS_THEAD); > > + return false; > > + } > > + > > + /* let has_vector() return true and set the static vlen */ > > Hmm, I was wondering about how you were going to communicate this to > userspace, since you're not going to be setting "v" in your DT, so > there'll be nothing in /proc/cpuinfo indicating it. (I am assuming that > this is your intention, as you'd not need to drop the alternative-based > stuff from has_vector() if it wasn't) > > I don't think you can do this, as things stand, because of how hwprobe > operates: > > static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, > const struct cpumask *cpus) > { > ... > > if (has_vector()) > pair->value |= RISCV_HWPROBE_IMA_V; > > ... > } > > * :c:macro:`RISCV_HWPROBE_IMA_V`: The V extension is supported, as defined by > version 1.0 of the RISC-V Vector extension manual. > > You'll need to change hwprobe to use has_vector() && > riscv_has_extension_unlikely(v), or similar, as the condition for > reporting. You'll also need some other way to communicate to userspace > that T-Head's vector stuff is supported, no? > > I'm also _really_ unconvinced that turning on extensions that were not > indicated in the DT or via ACPI is something we should be doing. Have I > missed something here that'd make that assessment inaccurate? > > Cheers, > Conor. > > FWIW I am currently working on kernel-side support for the new > extension properties that I have posted dt-binding patches for. > I'll go post it once Palmer has merged his current set of patches in his > staging repo into for-next, as I've got a lot of deps on riscv,isa > parser changes. > I'm really hoping that it provides an easier way to go off probing for > vendor specific stuff for DT-based systems, since it will no longer > require complex probing, just an of_property_match_string() for each > possible cpu and we could very well provide a vendor hook during that > process. > Clearly though, that stuff is not yet merged as it has not even been > posted yet. > > Current WIP of that is here: > https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/log/?h=riscv-extensions-strings-on-palmer > > > + riscv_vector_supported(); > > + riscv_v_vsize = 128 / 8 * 32; > > + > > + return true; > > +} > > + > > static u32 thead_errata_probe(unsigned int stage, > > unsigned long archid, unsigned long impid) > > { > > @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage, > > if (errata_probe_pmu(stage, archid, impid)) > > cpu_req_errata |= BIT(ERRATA_THEAD_PMU); > > > > + if (errata_probe_vector(stage, archid, impid)) > > + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR); > > + > > return cpu_req_errata; > > } > > > > diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h > > index 2d79bca6ffe8..521b3b939e51 100644 > > --- a/arch/riscv/include/asm/csr.h > > +++ b/arch/riscv/include/asm/csr.h > > @@ -24,11 +24,25 @@ > > #define SR_FS_CLEAN _AC(0x00004000, UL) > > #define SR_FS_DIRTY _AC(0x00006000, UL) > > > > -#define SR_VS _AC(0x00000600, UL) /* Vector Status */ > > -#define SR_VS_OFF _AC(0x00000000, UL) > > -#define SR_VS_INITIAL _AC(0x00000200, UL) > > -#define SR_VS_CLEAN _AC(0x00000400, UL) > > -#define SR_VS_DIRTY _AC(0x00000600, UL) > > +#define SR_VS_OFF _AC(0x00000000, UL) > > + > > +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */ > > +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL) > > +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL) > > +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL) > > + > > +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */ > > +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) > > +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) > > +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) > > + > > +/* > > + * Always default to vector-1.0 handling in assembly and let the broken > > + * implementations handle their case separately. > > + */ > > +#ifdef __ASSEMBLY__ > > +#define SR_VS SR_VS_1_0 > > +#endif > > > > #define SR_XS _AC(0x00018000, UL) /* Extension Status */ > > #define SR_XS_OFF _AC(0x00000000, UL) > > diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h > > index fb1a810f3d8c..ab21fadbe9c6 100644 > > --- a/arch/riscv/include/asm/errata_list.h > > +++ b/arch/riscv/include/asm/errata_list.h > > @@ -21,7 +21,8 @@ > > #define ERRATA_THEAD_PBMT 0 > > #define ERRATA_THEAD_CMO 1 > > #define ERRATA_THEAD_PMU 2 > > -#define ERRATA_THEAD_NUMBER 3 > > +#define ERRATA_THEAD_VECTOR 3 > > +#define ERRATA_THEAD_NUMBER 4 > > #endif > > > > #ifdef __ASSEMBLY__ > > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \ > > : "=r" (__ovl) : \ > > : "memory") > > > > +#ifdef CONFIG_ERRATA_THEAD_VECTOR > > + > > +#define THEAD_C9XX_CSR_VXSAT 0x9 > > +#define THEAD_C9XX_CSR_VXRM 0xa > > + > > +/* > > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older > > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for > > + * vsetvli t4, x0, e8, m8, d1 > > + */ > > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" > > + > > +/* > > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same > > + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize > > + * the call resulting in a different encoding and then using a value for > > + * the "mop" field that is not part of vector-0.7.1 > > + * So encode specific variants for vstate_save and _restore. > > + */ > > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" > > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" > > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" > > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" > > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" > > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" > > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" > > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" > > + > > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 > > +#define ALT_SR_VS_THEAD_SHIFT 23 > > + > > +#define ALT_SR_VS(_val, prot) \ > > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ > > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ > > + : "=r"(_val) \ > > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ > > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ > > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ > > + "I"(ALT_SR_VS_THEAD_SHIFT)) > > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ > > + > > #endif /* __ASSEMBLY__ */ > > > > #endif > > diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h > > index 315c96d2b4d0..fa47f60f81e3 100644 > > --- a/arch/riscv/include/asm/vector.h > > +++ b/arch/riscv/include/asm/vector.h > > @@ -18,6 +18,55 @@ > > #include <asm/hwcap.h> > > #include <asm/csr.h> > > #include <asm/asm.h> > > +#include <asm/errata_list.h> > > + > > +#ifdef CONFIG_ERRATA_THEAD_VECTOR > > + > > +static inline unsigned long riscv_sr_vs(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS); > > + return val; > > +} > > + > > +static inline unsigned long riscv_sr_vs_initial(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS_INITIAL); > > + return val; > > +} > > + > > +static inline unsigned long riscv_sr_vs_clean(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS_CLEAN); > > + return val; > > +} > > + > > +static inline unsigned long riscv_sr_vs_dirty(void) > > +{ > > + u32 val; > > + > > + ALT_SR_VS(val, SR_VS_DIRTY); > > + return val; > > +} > > + > > +#define SR_VS riscv_sr_vs() > > +#define SR_VS_INITIAL riscv_sr_vs_initial() > > +#define SR_VS_CLEAN riscv_sr_vs_clean() > > +#define SR_VS_DIRTY riscv_sr_vs_dirty() > > + > > +#else /* CONFIG_ERRATA_THEAD_VECTOR */ > > + > > +#define SR_VS SR_VS_1_0 > > +#define SR_VS_INITIAL SR_VS_INITIAL_1_0 > > +#define SR_VS_CLEAN SR_VS_CLEAN_1_0 > > +#define SR_VS_DIRTY SR_VS_DIRTY_1_0 > > + > > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ > > > > extern bool riscv_v_supported; > > void riscv_vector_supported(void); > > @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void) > > > > static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) > > { > > - asm volatile ( > > + register u32 t1 asm("t1") = (SR_FS); > > + > > + /* > > + * CSR_VCSR is defined as > > + * [2:1] - vxrm[1:0] > > + * [0] - vxsat > > + * The earlier vector spec implemented by T-Head uses separate > > + * registers for the same bit-elements, so just combine those > > + * into the existing output field. > > + * > > + * Additionally T-Head cores need FS to be enabled when accessing > > + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. > > + * Though the cores do not implement the VXRM and VXSAT fields in the > > + * FCSR CSR that vector-0.7.1 specifies. > > + */ > > + asm volatile (ALTERNATIVE( > > "csrr %0, " __stringify(CSR_VSTART) "\n\t" > > "csrr %1, " __stringify(CSR_VTYPE) "\n\t" > > "csrr %2, " __stringify(CSR_VL) "\n\t" > > "csrr %3, " __stringify(CSR_VCSR) "\n\t" > > + __nops(5), > > + "csrs sstatus, t1\n\t" > > + "csrr %0, " __stringify(CSR_VSTART) "\n\t" > > + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" > > + "csrr %2, " __stringify(CSR_VL) "\n\t" > > + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t" > > + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" > > + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t" > > + "or %3, %3, t4\n\t" > > + "csrc sstatus, t1\n\t", > > + THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), > > - "=r" (dest->vcsr) : :); > > + "=r" (dest->vcsr) : "r"(t1) : "t4"); > > } > > > > static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) > > { > > - asm volatile ( > > + register u32 t1 asm("t1") = (SR_FS); > > + > > + /* > > + * Similar to __vstate_csr_save above, restore values for the > > + * separate VXRM and VXSAT CSRs from the vcsr variable. > > + */ > > + asm volatile (ALTERNATIVE( > > ".option push\n\t" > > ".option arch, +v\n\t" > > "vsetvl x0, %2, %1\n\t" > > ".option pop\n\t" > > "csrw " __stringify(CSR_VSTART) ", %0\n\t" > > "csrw " __stringify(CSR_VCSR) ", %3\n\t" > > + __nops(6), > > + "csrs sstatus, t1\n\t" > > + ".option push\n\t" > > + ".option arch, +v\n\t" > > + "vsetvl x0, %2, %1\n\t" > > + ".option pop\n\t" > > + "csrw " __stringify(CSR_VSTART) ", %0\n\t" > > + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" > > + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" > > + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" > > + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" > > + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" > > + "csrc sstatus, t1\n\t", > > + THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), > > - "r" (src->vcsr) :); > > + "r" (src->vcsr), "r"(t1) : "t4"); > > } > > > > static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > > @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > > > > riscv_v_enable(); > > __vstate_csr_save(save_to); > > - asm volatile ( > > + asm volatile (ALTERNATIVE( > > + "nop\n\t" > > ".option push\n\t" > > ".option arch, +v\n\t" > > "vsetvli %0, x0, e8, m8, ta, ma\n\t" > > @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, > > "vse8.v v16, (%1)\n\t" > > "add %1, %1, %0\n\t" > > "vse8.v v24, (%1)\n\t" > > - ".option pop\n\t" > > - : "=&r" (vl) : "r" (datap) : "memory"); > > + ".option pop\n\t", > > + "mv t0, %1\n\t" > > + THEAD_VSETVLI_T4X0E8M8D1 > > + THEAD_VSB_V_V0T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VSB_V_V8T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VSB_V_V16T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); > > riscv_v_disable(); > > } > > > > @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ > > unsigned long vl; > > > > riscv_v_enable(); > > - asm volatile ( > > + asm volatile (ALTERNATIVE( > > + "nop\n\t" > > ".option push\n\t" > > ".option arch, +v\n\t" > > "vsetvli %0, x0, e8, m8, ta, ma\n\t" > > @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ > > "vle8.v v16, (%1)\n\t" > > "add %1, %1, %0\n\t" > > "vle8.v v24, (%1)\n\t" > > - ".option pop\n\t" > > - : "=&r" (vl) : "r" (datap) : "memory"); > > + ".option pop\n\t", > > + "mv t0, %1\n\t" > > + THEAD_VSETVLI_T4X0E8M8D1 > > + THEAD_VLB_V_V0T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VLB_V_V8T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VLB_V_V16T0 > > + "addi t0, t0, 128\n\t" > > + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID, > > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) > > + : "=&r" (vl) : "r" (datap) : "t0", "t4"); > > __vstate_csr_restore(restore_from); > > riscv_v_disable(); > > } > > diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c > > index 74178fb71805..51726890a4d0 100644 > > --- a/arch/riscv/kernel/vector.c > > +++ b/arch/riscv/kernel/vector.c > > @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) > > u32 insn = (u32)regs->badaddr; > > > > /* Do not handle if V is not supported, or disabled */ > > - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) > > + if (!has_vector()) > > return false; riscv_v_first_use_handler() will not be able to detect if a process is running with PR_RISCV_V_VSTATE_CTRL_OFF here after applying this change IIIUC. This is the case where we disable the availability of V for a process but it still executes V instructions anyway. > > > > /* If V has been enabled then it is not the first-use trap */ > > -- > > 2.39.2 > > > _______________________________________________ > linux-riscv mailing list > linux-riscv@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Hi, Le perjantaina 23. kesäkuuta 2023, 2.13.05 EEST Heiko Stuebner a écrit : > diff --git a/arch/riscv/include/asm/errata_list.h > b/arch/riscv/include/asm/errata_list.h index fb1a810f3d8c..ab21fadbe9c6 > 100644 > --- a/arch/riscv/include/asm/errata_list.h > +++ b/arch/riscv/include/asm/errata_list.h > @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \ > > : "=r" (__ovl) : \ > : "memory") > > +#ifdef CONFIG_ERRATA_THEAD_VECTOR > + > +#define THEAD_C9XX_CSR_VXSAT 0x9 > +#define THEAD_C9XX_CSR_VXRM 0xa > + > +/* > + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older > + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for > + * vsetvli t4, x0, e8, m8, d1 > + */ > +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" That is equivalent to, and (IMHO) much less legible than: ".insn i OP_V, 7, t4, x0, 3" Or even if you don't mind second-guessing RVV 1.0 assemblers: "vsetvli t4, zero, e8, m8, tu, mu" Either way, you don't need to hard-code X-register operands in assembler macros (though you do unfortunately need to hard-code V register operands if you use .insn). > + > +/* > + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same > + * encoding as the standard vse8.v and vle8.v, Not only in theory. vse8.v and vle8.v have only one possible encoding each (for given operands). > compilers seem to optimize Nit: By "compilers", do you mean "assemblers"? That's a bit misleading to me. > + * the call resulting in a different encoding and then using a value for > + * the "mop" field that is not part of vector-0.7.1 Uh, no? They use mew = 0b0 and mop = 0b00, which corresponds to mop = 0b000. > + * So encode specific variants for vstate_save and _restore. > + */ > +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" That's "vse8.v v0, (t0)", at least as assembled with binutils 2.40.50.20230625 (from Debian unstable). I don't understand the rationale for hard-coding from the above comment. Maybe that's just me being an idiot, but if so, then the comment ought to be clarified. (I do realise that vse8.v and vsb.v are not exactly equivalent in behaviour, but here, the concern should be the assembler, not the processor.) > +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" > +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" > +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" > +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" This has one nibble too many for a 32-bit value. And why use sign-extended loads? Zero-extended loads would have the exact same encoding as vle8.v, and not need this dark magic, AFAICT. > +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" > +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" > +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" > + > +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 > +#define ALT_SR_VS_THEAD_SHIFT 23 > + > +#define ALT_SR_VS(_val, prot) \ > +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ > + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ > + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ > + : "=r"(_val) \ > + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ > + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ > + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ > + "I"(ALT_SR_VS_THEAD_SHIFT)) > +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ > + > #endif /* __ASSEMBLY__ */ > > #endif
diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 0c8f4652cd82..b461312dd452 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -77,4 +77,17 @@ config ERRATA_THEAD_PMU If you don't know what to do here, say "Y". +config ERRATA_THEAD_VECTOR + bool "Apply T-Head Vector errata" + depends on ERRATA_THEAD && RISCV_ISA_V + default y + help + The T-Head C9xx cores implement an earlier version 0.7.1 + of the vector extensions. + + This will apply the necessary errata to handle the non-standard + behaviour via when switch to and from vector mode for processes. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index c259dc925ec1..c41ec84bc8a5 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -15,6 +15,7 @@ #include <asm/errata_list.h> #include <asm/hwprobe.h> #include <asm/patch.h> +#include <asm/vector.h> #include <asm/vendorid_list.h> static bool errata_probe_pbmt(unsigned int stage, @@ -66,6 +67,34 @@ static bool errata_probe_pmu(unsigned int stage, return true; } +static bool errata_probe_vector(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_VECTOR)) + return false; + + /* target-c9xx cores report arch_id and impid as 0 */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) { + /* + * Disable VECTOR to detect illegal usage of vector in kernel. + * This is normally done in _start_kernel but with the + * vector-1.0 SR_VS bits. VS is using [24:23] on T-Head's + * vector-0.7.1 and the vector-1.0-bits are unused there. + */ + csr_clear(CSR_STATUS, SR_VS_THEAD); + return false; + } + + /* let has_vector() return true and set the static vlen */ + riscv_vector_supported(); + riscv_v_vsize = 128 / 8 * 32; + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -80,6 +109,9 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_pmu(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + if (errata_probe_vector(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_VECTOR); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 2d79bca6ffe8..521b3b939e51 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -24,11 +24,25 @@ #define SR_FS_CLEAN _AC(0x00004000, UL) #define SR_FS_DIRTY _AC(0x00006000, UL) -#define SR_VS _AC(0x00000600, UL) /* Vector Status */ -#define SR_VS_OFF _AC(0x00000000, UL) -#define SR_VS_INITIAL _AC(0x00000200, UL) -#define SR_VS_CLEAN _AC(0x00000400, UL) -#define SR_VS_DIRTY _AC(0x00000600, UL) +#define SR_VS_OFF _AC(0x00000000, UL) + +#define SR_VS_1_0 _AC(0x00000600, UL) /* Vector Status */ +#define SR_VS_INITIAL_1_0 _AC(0x00000200, UL) +#define SR_VS_CLEAN_1_0 _AC(0x00000400, UL) +#define SR_VS_DIRTY_1_0 _AC(0x00000600, UL) + +#define SR_VS_THEAD _AC(0x01800000, UL) /* Vector Status */ +#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL) +#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL) +#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL) + +/* + * Always default to vector-1.0 handling in assembly and let the broken + * implementations handle their case separately. + */ +#ifdef __ASSEMBLY__ +#define SR_VS SR_VS_1_0 +#endif #define SR_XS _AC(0x00018000, UL) /* Extension Status */ #define SR_XS_OFF _AC(0x00000000, UL) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index fb1a810f3d8c..ab21fadbe9c6 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -21,7 +21,8 @@ #define ERRATA_THEAD_PBMT 0 #define ERRATA_THEAD_CMO 1 #define ERRATA_THEAD_PMU 2 -#define ERRATA_THEAD_NUMBER 3 +#define ERRATA_THEAD_VECTOR 3 +#define ERRATA_THEAD_NUMBER 4 #endif #ifdef __ASSEMBLY__ @@ -154,6 +155,48 @@ asm volatile(ALTERNATIVE( \ : "=r" (__ovl) : \ : "memory") +#ifdef CONFIG_ERRATA_THEAD_VECTOR + +#define THEAD_C9XX_CSR_VXSAT 0x9 +#define THEAD_C9XX_CSR_VXRM 0xa + +/* + * Vector 0.7.1 as used for example on T-Head Xuantie cores, uses an older + * encoding for vsetvli (ta, ma vs. d1), so provide an instruction for + * vsetvli t4, x0, e8, m8, d1 + */ +#define THEAD_VSETVLI_T4X0E8M8D1 ".long 0x00307ed7\n\t" + +/* + * While in theory, the vector-0.7.1 vsb.v and vlb.v result in the same + * encoding as the standard vse8.v and vle8.v, compilers seem to optimize + * the call resulting in a different encoding and then using a value for + * the "mop" field that is not part of vector-0.7.1 + * So encode specific variants for vstate_save and _restore. + */ +#define THEAD_VSB_V_V0T0 ".long 0x02028027\n\t" +#define THEAD_VSB_V_V8T0 ".long 0x02028427\n\t" +#define THEAD_VSB_V_V16T0 ".long 0x02028827\n\t" +#define THEAD_VSB_V_V24T0 ".long 0x02028c27\n\t" +#define THEAD_VLB_V_V0T0 ".long 0x012028007\n\t" +#define THEAD_VLB_V_V8T0 ".long 0x012028407\n\t" +#define THEAD_VLB_V_V16T0 ".long 0x012028807\n\t" +#define THEAD_VLB_V_V24T0 ".long 0x012028c07\n\t" + +#define ALT_SR_VS_VECTOR_1_0_SHIFT 9 +#define ALT_SR_VS_THEAD_SHIFT 23 + +#define ALT_SR_VS(_val, prot) \ +asm(ALTERNATIVE("li %0, %1\t\nslli %0,%0,%3", \ + "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) \ + : "=r"(_val) \ + : "I"(prot##_1_0 >> ALT_SR_VS_VECTOR_1_0_SHIFT), \ + "I"(prot##_THEAD >> ALT_SR_VS_THEAD_SHIFT), \ + "I"(ALT_SR_VS_VECTOR_1_0_SHIFT), \ + "I"(ALT_SR_VS_THEAD_SHIFT)) +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 315c96d2b4d0..fa47f60f81e3 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -18,6 +18,55 @@ #include <asm/hwcap.h> #include <asm/csr.h> #include <asm/asm.h> +#include <asm/errata_list.h> + +#ifdef CONFIG_ERRATA_THEAD_VECTOR + +static inline unsigned long riscv_sr_vs(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS); + return val; +} + +static inline unsigned long riscv_sr_vs_initial(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS_INITIAL); + return val; +} + +static inline unsigned long riscv_sr_vs_clean(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS_CLEAN); + return val; +} + +static inline unsigned long riscv_sr_vs_dirty(void) +{ + u32 val; + + ALT_SR_VS(val, SR_VS_DIRTY); + return val; +} + +#define SR_VS riscv_sr_vs() +#define SR_VS_INITIAL riscv_sr_vs_initial() +#define SR_VS_CLEAN riscv_sr_vs_clean() +#define SR_VS_DIRTY riscv_sr_vs_dirty() + +#else /* CONFIG_ERRATA_THEAD_VECTOR */ + +#define SR_VS SR_VS_1_0 +#define SR_VS_INITIAL SR_VS_INITIAL_1_0 +#define SR_VS_CLEAN SR_VS_CLEAN_1_0 +#define SR_VS_DIRTY SR_VS_DIRTY_1_0 + +#endif /* CONFIG_ERRATA_THEAD_VECTOR */ extern bool riscv_v_supported; void riscv_vector_supported(void); @@ -63,26 +112,74 @@ static __always_inline void riscv_v_disable(void) static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) { - asm volatile ( + register u32 t1 asm("t1") = (SR_FS); + + /* + * CSR_VCSR is defined as + * [2:1] - vxrm[1:0] + * [0] - vxsat + * The earlier vector spec implemented by T-Head uses separate + * registers for the same bit-elements, so just combine those + * into the existing output field. + * + * Additionally T-Head cores need FS to be enabled when accessing + * the VXRM and VXSAT CSRs, otherwise ending in illegal instructions. + * Though the cores do not implement the VXRM and VXSAT fields in the + * FCSR CSR that vector-0.7.1 specifies. + */ + asm volatile (ALTERNATIVE( "csrr %0, " __stringify(CSR_VSTART) "\n\t" "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" "csrr %3, " __stringify(CSR_VCSR) "\n\t" + __nops(5), + "csrs sstatus, t1\n\t" + "csrr %0, " __stringify(CSR_VSTART) "\n\t" + "csrr %1, " __stringify(CSR_VTYPE) "\n\t" + "csrr %2, " __stringify(CSR_VL) "\n\t" + "csrr %3, " __stringify(THEAD_C9XX_CSR_VXRM) "\n\t" + "slliw %3, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" + "csrr t4, " __stringify(THEAD_C9XX_CSR_VXSAT) "\n\t" + "or %3, %3, t4\n\t" + "csrc sstatus, t1\n\t", + THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr) : :); + "=r" (dest->vcsr) : "r"(t1) : "t4"); } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) { - asm volatile ( + register u32 t1 asm("t1") = (SR_FS); + + /* + * Similar to __vstate_csr_save above, restore values for the + * separate VXRM and VXSAT CSRs from the vcsr variable. + */ + asm volatile (ALTERNATIVE( ".option push\n\t" ".option arch, +v\n\t" "vsetvl x0, %2, %1\n\t" ".option pop\n\t" "csrw " __stringify(CSR_VSTART) ", %0\n\t" "csrw " __stringify(CSR_VCSR) ", %3\n\t" + __nops(6), + "csrs sstatus, t1\n\t" + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvl x0, %2, %1\n\t" + ".option pop\n\t" + "csrw " __stringify(CSR_VSTART) ", %0\n\t" + "srliw t4, %3, " __stringify(VCSR_VXRM_SHIFT) "\n\t" + "andi t4, t4, " __stringify(VCSR_VXRM_MASK) "\n\t" + "csrw " __stringify(THEAD_C9XX_CSR_VXRM) ", t4\n\t" + "andi %3, %3, " __stringify(VCSR_VXSAT_MASK) "\n\t" + "csrw " __stringify(THEAD_C9XX_CSR_VXSAT) ", %3\n\t" + "csrc sstatus, t1\n\t", + THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) : : "r" (src->vstart), "r" (src->vtype), "r" (src->vl), - "r" (src->vcsr) :); + "r" (src->vcsr), "r"(t1) : "t4"); } static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, @@ -92,7 +189,8 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, riscv_v_enable(); __vstate_csr_save(save_to); - asm volatile ( + asm volatile (ALTERNATIVE( + "nop\n\t" ".option push\n\t" ".option arch, +v\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" @@ -103,8 +201,18 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to, "vse8.v v16, (%1)\n\t" "add %1, %1, %0\n\t" "vse8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + ".option pop\n\t", + "mv t0, %1\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VSB_V_V0T0 + "addi t0, t0, 128\n\t" + THEAD_VSB_V_V8T0 + "addi t0, t0, 128\n\t" + THEAD_VSB_V_V16T0 + "addi t0, t0, 128\n\t" + THEAD_VSB_V_V24T0, THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) + : "=&r" (vl) : "r" (datap) : "t0", "t4", "memory"); riscv_v_disable(); } @@ -114,7 +222,8 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ unsigned long vl; riscv_v_enable(); - asm volatile ( + asm volatile (ALTERNATIVE( + "nop\n\t" ".option push\n\t" ".option arch, +v\n\t" "vsetvli %0, x0, e8, m8, ta, ma\n\t" @@ -125,8 +234,18 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_ "vle8.v v16, (%1)\n\t" "add %1, %1, %0\n\t" "vle8.v v24, (%1)\n\t" - ".option pop\n\t" - : "=&r" (vl) : "r" (datap) : "memory"); + ".option pop\n\t", + "mv t0, %1\n\t" + THEAD_VSETVLI_T4X0E8M8D1 + THEAD_VLB_V_V0T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V8T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V16T0 + "addi t0, t0, 128\n\t" + THEAD_VLB_V_V24T0, THEAD_VENDOR_ID, + ERRATA_THEAD_VECTOR, CONFIG_ERRATA_THEAD_VECTOR) + : "=&r" (vl) : "r" (datap) : "t0", "t4"); __vstate_csr_restore(restore_from); riscv_v_disable(); } diff --git a/arch/riscv/kernel/vector.c b/arch/riscv/kernel/vector.c index 74178fb71805..51726890a4d0 100644 --- a/arch/riscv/kernel/vector.c +++ b/arch/riscv/kernel/vector.c @@ -140,7 +140,7 @@ bool riscv_v_first_use_handler(struct pt_regs *regs) u32 insn = (u32)regs->badaddr; /* Do not handle if V is not supported, or disabled */ - if (!(ELF_HWCAP & COMPAT_HWCAP_ISA_V)) + if (!has_vector()) return false; /* If V has been enabled then it is not the first-use trap */