On 27/05/2015 14:29, Yongbok Kim wrote: > MIPS SIMD Architecture vector loads and stores require misalignment support. > MSA Memory access should work as an atomic operation. Therefore, it has to > check validity of all addresses for a vector store access if it is spanning > into two pages. > > Separating helper functions for each data format as format is known in > translation. > To use mmu_idx from cpu_mmu_index() instead of calculating it from hflag. > Removing save_cpu_state() call in translation because it is able to use > cpu_restore_state() on fault as GETRA() is passed. > > Signed-off-by: Yongbok Kim <yongbok....@imgtec.com> > --- > target-mips/helper.h | 10 +++- > target-mips/op_helper.c | 135 +++++++++++++++++++++++++--------------------- > target-mips/translate.c | 27 ++++++---- > 3 files changed, 98 insertions(+), 74 deletions(-) > > diff --git a/target-mips/helper.h b/target-mips/helper.h > index 3bd0b02..bdd5ba5 100644 > --- a/target-mips/helper.h > +++ b/target-mips/helper.h > @@ -931,5 +931,11 @@ DEF_HELPER_4(msa_ftint_u_df, void, env, i32, i32, i32) > DEF_HELPER_4(msa_ffint_s_df, void, env, i32, i32, i32) > DEF_HELPER_4(msa_ffint_u_df, void, env, i32, i32, i32) > > -DEF_HELPER_5(msa_ld_df, void, env, i32, i32, i32, s32) > -DEF_HELPER_5(msa_st_df, void, env, i32, i32, i32, s32) > +#define MSALDST_PROTO(type) \ > +DEF_HELPER_3(msa_ld_ ## type, void, env, i32, tl) \ > +DEF_HELPER_3(msa_st_ ## type, void, env, i32, tl) > +MSALDST_PROTO(b) > +MSALDST_PROTO(h) > +MSALDST_PROTO(w) > +MSALDST_PROTO(d) > +#undef MSALDST_PROTO > diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c > index 73a8e45..f44b9bb 100644 > --- a/target-mips/op_helper.c > +++ b/target-mips/op_helper.c > @@ -3558,72 +3558,83 @@ FOP_CONDN_S(sne, (float32_lt(fst1, fst0, > &env->active_fpu.fp_status) > /* Element-by-element access macros */ > #define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df)) > > -void helper_msa_ld_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t > rs, > - int32_t s10) > -{ > - wr_t *pwd = &(env->active_fpu.fpr[wd].wr); > - target_ulong addr = env->active_tc.gpr[rs] + (s10 << df); > - int i; > +#if !defined(CONFIG_USER_ONLY) > +#define MEMOP_IDX(DF) \ > + TCGMemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN, \ > + cpu_mmu_index(env)); > +#else > +#define MEMOP_IDX(DF) > +#endif > > - switch (df) { > - case DF_BYTE: > - for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { > - pwd->b[i] = do_lbu(env, addr + (i << DF_BYTE), > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > - case DF_HALF: > - for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { > - pwd->h[i] = do_lhu(env, addr + (i << DF_HALF), > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > - case DF_WORD: > - for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { > - pwd->w[i] = do_lw(env, addr + (i << DF_WORD), > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > - case DF_DOUBLE: > - for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { > - pwd->d[i] = do_ld(env, addr + (i << DF_DOUBLE), > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > - } > +#define MSA_LD_DF(DF, TYPE, LD_INSN, ...) \ > +void helper_msa_ld_ ## TYPE(CPUMIPSState *env, uint32_t wd, \ > + target_ulong addr) \ > +{ \ > + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \ > + wr_t wx; \ > + int i; \ > + MEMOP_IDX(DF) \ > + for (i = 0; i < DF_ELEMENTS(DF); i++) { \ > + wx.TYPE[i] = LD_INSN(env, addr + (i << DF), ##__VA_ARGS__); \ > + } \ > + memcpy(pwd, &wx, sizeof(wr_t)); \ > } > > -void helper_msa_st_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t > rs, > - int32_t s10) > +#if !defined(CONFIG_USER_ONLY) > +MSA_LD_DF(DF_BYTE, b, helper_ret_ldub_mmu, oi, GETRA()) > +MSA_LD_DF(DF_HALF, h, helper_ret_lduw_mmu, oi, GETRA()) > +MSA_LD_DF(DF_WORD, w, helper_ret_ldul_mmu, oi, GETRA()) > +MSA_LD_DF(DF_DOUBLE, d, helper_ret_ldq_mmu, oi, GETRA()) > +#else > +MSA_LD_DF(DF_BYTE, b, cpu_ldub_data) > +MSA_LD_DF(DF_HALF, h, cpu_lduw_data) > +MSA_LD_DF(DF_WORD, w, cpu_ldl_data) > +MSA_LD_DF(DF_DOUBLE, d, cpu_ldq_data) > +#endif > + > +static inline void ensure_writable_pages(CPUMIPSState *env, > + target_ulong addr, > + int mmu_idx, > + uintptr_t retaddr) > { > - wr_t *pwd = &(env->active_fpu.fpr[wd].wr); > - target_ulong addr = env->active_tc.gpr[rs] + (s10 << df); > - int i; > +#if !defined(CONFIG_USER_ONLY) > +#define MSA_PAGESPAN(x) (unlikely((((x) & ~TARGET_PAGE_MASK) \ > + + MSA_WRLEN/8 - 1) >= TARGET_PAGE_SIZE)) > + target_ulong page_addr;
nit: I find the beginning of this simple function somewhat hard to read. How about moving this macro definition outside the body? > > - switch (df) { > - case DF_BYTE: > - for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { > - do_sb(env, addr + (i << DF_BYTE), pwd->b[i], > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > - case DF_HALF: > - for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { > - do_sh(env, addr + (i << DF_HALF), pwd->h[i], > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > - case DF_WORD: > - for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { > - do_sw(env, addr + (i << DF_WORD), pwd->w[i], > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > - case DF_DOUBLE: > - for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { > - do_sd(env, addr + (i << DF_DOUBLE), pwd->d[i], > - env->hflags & MIPS_HFLAG_KSU); > - } > - break; > + if (MSA_PAGESPAN(addr)) { nit: Wouldn't "unlikely()" fit better here rather than hiding it in MSA_PAGESPAN()? > + /* first page */ > + probe_write(env, addr, mmu_idx, retaddr); > + /* second page */ > + page_addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE; > + probe_write(env, page_addr, mmu_idx, retaddr); > } > +#endif > +} > + > +#define MSA_ST_DF(DF, TYPE, ST_INSN, ...) \ > +void helper_msa_st_ ## TYPE(CPUMIPSState *env, uint32_t wd, \ > + target_ulong addr) \ > +{ \ > + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \ > + int mmu_idx = cpu_mmu_index(env); \ > + int i; \ > + MEMOP_IDX(DF) \ > + ensure_writable_pages(env, addr, mmu_idx, GETRA()); \ > + for (i = 0; i < DF_ELEMENTS(DF); i++) { \ > + ST_INSN(env, addr + (i << DF), pwd->TYPE[i], ##__VA_ARGS__); \ > + } \ > } > + > +#if !defined(CONFIG_USER_ONLY) > +MSA_ST_DF(DF_BYTE, b, helper_ret_stb_mmu, oi, GETRA()) > +MSA_ST_DF(DF_HALF, h, helper_ret_stw_mmu, oi, GETRA()) > +MSA_ST_DF(DF_WORD, w, helper_ret_stl_mmu, oi, GETRA()) > +MSA_ST_DF(DF_DOUBLE, d, helper_ret_stq_mmu, oi, GETRA()) > +#else > +MSA_ST_DF(DF_BYTE, b, cpu_stb_data) > +MSA_ST_DF(DF_HALF, h, cpu_stw_data) > +MSA_ST_DF(DF_WORD, w, cpu_stl_data) > +MSA_ST_DF(DF_DOUBLE, d, cpu_stq_data) > +#endif > + When I applied this patch, git complained: /home/lea/wrk/qemu/.git/rebase-apply/patch:174: new blank line at EOF. + warning: 1 line adds whitespace errors. Generally I'm wondering if this code would look better if helper_msa_ld_* and helper_msa_st_* definitions weren't common for linux-user and system. Yes, we would duplicate some bits, but we would avoid things like ##__VA_ARGS__, empty MEMOP_IDX(DF) and empty ensure_writable_pages(). It's up to you. Thanks, Leon