Message ID | 20240918171412.150107-8-max.chou@sifive.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improve the performance of RISC-V vector unit-stride/whole register ld/st instructions | expand |
On 9/18/24 2:14 PM, Max Chou wrote: > In the vector unit-stride load/store helper functions. the vext_ldst_us > & vext_ldst_whole functions corresponding most of the execution time. > Inline the functions can avoid the function call overhead to improve the > helper function performance. > > Signed-off-by: Max Chou <max.chou@sifive.com> > Reviewed-by: Richard Henderson <richard.henderson@linaro.org> > --- Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com> > target/riscv/vector_helper.c | 18 +++++++++++------- > 1 file changed, 11 insertions(+), 7 deletions(-) > > diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c > index 654d5e111f3..0d5ed950486 100644 > --- a/target/riscv/vector_helper.c > +++ b/target/riscv/vector_helper.c > @@ -152,14 +152,16 @@ typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, > typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); > > #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ > -static void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ > +static inline QEMU_ALWAYS_INLINE \ > +void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ > uint32_t idx, void *vd, uintptr_t retaddr) \ > { \ > ETYPE *cur = ((ETYPE *)vd + H(idx)); \ > *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ > } \ > \ > -static void NAME##_host(void *vd, uint32_t idx, void *host) \ > +static inline QEMU_ALWAYS_INLINE \ > +void NAME##_host(void *vd, uint32_t idx, void *host) \ > { \ > ETYPE *cur = ((ETYPE *)vd + H(idx)); \ > *cur = (ETYPE)LDSUF##_p(host); \ > @@ -171,14 +173,16 @@ GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) > GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) > > #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ > -static void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ > +static inline QEMU_ALWAYS_INLINE \ > +void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ > uint32_t idx, void *vd, uintptr_t retaddr) \ > { \ > ETYPE data = *((ETYPE *)vd + H(idx)); \ > cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ > } \ > \ > -static void NAME##_host(void *vd, uint32_t idx, void *host) \ > +static inline QEMU_ALWAYS_INLINE \ > +void NAME##_host(void *vd, uint32_t idx, void *host) \ > { \ > ETYPE data = *((ETYPE *)vd + H(idx)); \ > STSUF##_p(host, data); \ > @@ -317,7 +321,7 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) > */ > > /* unmasked unit-stride load and store operation */ > -static void > +static inline QEMU_ALWAYS_INLINE void > vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, > uint32_t elems, uint32_t nf, uint32_t max_elems, > uint32_t log2_esz, bool is_load, int mmu_index, > @@ -369,7 +373,7 @@ vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, > } > } > > -static void > +static inline QEMU_ALWAYS_INLINE void > vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, > vext_ldst_elem_fn_tlb *ldst_tlb, > vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, > @@ -756,7 +760,7 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) > /* > * load and store whole register instructions > */ > -static void > +static inline QEMU_ALWAYS_INLINE void > vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, > vext_ldst_elem_fn_tlb *ldst_tlb, > vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 654d5e111f3..0d5ed950486 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -152,14 +152,16 @@ typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); #define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ -static void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ uint32_t idx, void *vd, uintptr_t retaddr) \ { \ ETYPE *cur = ((ETYPE *)vd + H(idx)); \ *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ } \ \ -static void NAME##_host(void *vd, uint32_t idx, void *host) \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_host(void *vd, uint32_t idx, void *host) \ { \ ETYPE *cur = ((ETYPE *)vd + H(idx)); \ *cur = (ETYPE)LDSUF##_p(host); \ @@ -171,14 +173,16 @@ GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) #define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ -static void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ uint32_t idx, void *vd, uintptr_t retaddr) \ { \ ETYPE data = *((ETYPE *)vd + H(idx)); \ cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ } \ \ -static void NAME##_host(void *vd, uint32_t idx, void *host) \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_host(void *vd, uint32_t idx, void *host) \ { \ ETYPE data = *((ETYPE *)vd + H(idx)); \ STSUF##_p(host, data); \ @@ -317,7 +321,7 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) */ /* unmasked unit-stride load and store operation */ -static void +static inline QEMU_ALWAYS_INLINE void vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, uint32_t elems, uint32_t nf, uint32_t max_elems, uint32_t log2_esz, bool is_load, int mmu_index, @@ -369,7 +373,7 @@ vext_page_ldst_us(CPURISCVState *env, void *vd, target_ulong addr, } } -static void +static inline QEMU_ALWAYS_INLINE void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, @@ -756,7 +760,7 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb, lde_d_host) /* * load and store whole register instructions */ -static void +static inline QEMU_ALWAYS_INLINE void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz,