In the vector unit-stride load/store helper functions. the vext_ldst_us & vext_ldst_whole functions corresponding most of the execution time. Inline the functions can avoid the function call overhead to improve the helper function performance.
Signed-off-by: Max Chou <max.c...@sifive.com> --- target/riscv/vector_helper.c | 64 +++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 30 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 09c9b231c3f..4a21064a366 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -408,20 +408,22 @@ typedef void vext_ldst_elem_fn_tlb(CPURISCVState *env, abi_ptr addr, uint32_t idx, void *vd, uintptr_t retaddr); typedef void vext_ldst_elem_fn_host(void *vd, uint32_t idx, void *host); -#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ -static void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ - uint32_t byte_off, void *vd, uintptr_t retaddr) \ -{ \ - uint8_t *reg = ((uint8_t *)vd + byte_off); \ - ETYPE *cur = ((ETYPE *)reg); \ - *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ -} \ - \ -static void NAME##_host(void *vd, uint32_t byte_off, void *host) \ -{ \ - ETYPE val = LDSUF##_p(host); \ - uint8_t *reg = (uint8_t *)(vd + byte_off); \ - *(ETYPE *)(reg) = val; \ +#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ + uint32_t byte_off, void *vd, uintptr_t retaddr) \ +{ \ + uint8_t *reg = ((uint8_t *)vd + byte_off); \ + ETYPE *cur = ((ETYPE *)reg); \ + *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \ +} \ + \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_host(void *vd, uint32_t byte_off, void *host) \ +{ \ + ETYPE val = LDSUF##_p(host); \ + uint8_t *reg = (uint8_t *)(vd + byte_off); \ + *(ETYPE *)(reg) = val; \ } GEN_VEXT_LD_ELEM(lde_b, uint8_t, H1, ldub) @@ -429,20 +431,22 @@ GEN_VEXT_LD_ELEM(lde_h, uint16_t, H2, lduw) GEN_VEXT_LD_ELEM(lde_w, uint32_t, H4, ldl) GEN_VEXT_LD_ELEM(lde_d, uint64_t, H8, ldq) -#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ -static void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ - uint32_t byte_off, void *vd, uintptr_t retaddr) \ -{ \ - uint8_t *reg = ((uint8_t *)vd + byte_off); \ - ETYPE data = *((ETYPE *)reg); \ - cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ -} \ - \ -static void NAME##_host(void *vd, uint32_t byte_off, void *host) \ -{ \ - uint8_t *reg = ((uint8_t *)vd + byte_off); \ - ETYPE val = *(ETYPE *)(reg); \ - STSUF##_p(host, val); \ +#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_tlb(CPURISCVState *env, abi_ptr addr, \ + uint32_t byte_off, void *vd, uintptr_t retaddr) \ +{ \ + uint8_t *reg = ((uint8_t *)vd + byte_off); \ + ETYPE data = *((ETYPE *)reg); \ + cpu_##STSUF##_data_ra(env, addr, data, retaddr); \ +} \ + \ +static inline QEMU_ALWAYS_INLINE \ +void NAME##_host(void *vd, uint32_t byte_off, void *host) \ +{ \ + uint8_t *reg = ((uint8_t *)vd + byte_off); \ + ETYPE val = *(ETYPE *)(reg); \ + STSUF##_p(host, val); \ } GEN_VEXT_ST_ELEM(ste_b, uint8_t, H1, stb) @@ -604,7 +608,7 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d_tlb) */ /* unmasked unit-stride load and store operation */ -static void +static inline QEMU_ALWAYS_INLINE void vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, @@ -1006,7 +1010,7 @@ GEN_VEXT_LDFF(vle64ff_v, int64_t, lde_d_tlb) /* * load and store whole register instructions */ -static void +static inline QEMU_ALWAYS_INLINE void vext_ldst_whole(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc, vext_ldst_elem_fn_tlb *ldst_tlb, vext_ldst_elem_fn_host *ldst_host, uint32_t log2_esz, -- 2.34.1