On 2020/3/14 16:25, Richard Henderson wrote:
On 3/14/20 1:14 AM, Richard Henderson wrote:
I think you should have 4 versions of aadd8, for each of the rounding modes,
+RVVCALL(OPIVV2_ENV, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd8)
then use this, or something like it, to define 4 functions containing main
loops, which will get the helper above inlined.
Alternately, a set of inlines, where a (constant) vxrm is passed down from
above.
I am not sure whether I get it. In my opinion, the code should be modified like
static inline int8_t aadd8_rnu(CPURISCVState *env, int8_t a, int8_t b)
{
int16_t res = (int16_t)a + (int16_t)b;
uint8_t round = res & 0x1;
res = (res >> 1) + round;
return res;
}
static inline int8_t aadd8_rne(CPURISCVState *env, int8_t a, int8_t b)
{
int16_t res = (int16_t)a + (int16_t)b;
uint8_t round = ((res & 0x3) == 0x3);
res = (res >> 1) + round;
return res;
}
static inline int8_t aadd8_rdn(CPURISCVState *env, int8_t a, int8_t b)
{
int16_t res = (int16_t)a + (int16_t)b;
res = (res >> 1);
return res;
}
static inline int8_t aadd8_rod(CPURISCVState *env, int8_t a, int8_t b)
{
int16_t res = (int16_t)a + (int16_t)b;
uint8_t round = ((res & 0x3) == 0x1);
res = (res >> 1) + round;
return res;
}
RVVCALL(OPIVV2_ENV, vaadd_vv_b_rnu, OP_SSS_B, H1, H1, H1, aadd8_rnu)
RVVCALL(OPIVV2_ENV, vaadd_vv_b_rne, OP_SSS_B, H1, H1, H1, aadd8_rne)
RVVCALL(OPIVV2_ENV, vaadd_vv_b_rdn, OP_SSS_B, H1, H1, H1, aadd8_rdn)
RVVCALL(OPIVV2_ENV, vaadd_vv_b_rod, OP_SSS_B, H1, H1, H1, aadd8_rod)
void do_vext_vv_env(void *vd, void *v0, void *vs1,
void *vs2, CPURISCVState *env, uint32_t desc,
uint32_t esz, uint32_t dsz,
opivv2_fn *fn, clear_fn *clearfn)
{
uint32_t vlmax = vext_maxsz(desc) / esz;
uint32_t mlen = vext_mlen(desc);
uint32_t vm = vext_vm(desc);
uint32_t vl = env->vl;
uint32_t i;
for (i = 0; i < vl; i++) {
if (!vm && !vext_elem_mask(v0, mlen, i)) {
continue;
}
fn(vd, vs1, vs2, i, env);
}
if (i != 0) {
clear_fn(vd, vl, vl * dsz, vlmax * dsz);
}
}
#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN) \
void HELPER(NAME)(void *vd, void *v0, void *vs1, \
void *vs2, CPURISCVState *env, \
uint32_t desc) \
{ \
static opivv2_fn *fns[4] = { \
NAME##_rnu, NAME##_rne, \
NAME##_rdn, NAME##_rod \
} \
return do_vext_vv_env(vd, v0, vs1, vs2, env, desc, \
ESZ, DSZ, fns[env->vxrm], \
CLEAR_FN); \
}
Is it true?
Zhiwei
Then use a final outermost wrapper to select one of the 4 functions based on
env->vxrm.
The outermost wrapper could look like
switch (env->vxrm) {
case 0: somefunc(some, args, 0); break;
case 1: somefunc(some, args, 1); break;
case 2: somefunc(some, args, 2); break;
default: somefunc(some, args, 3); break;
}
so that somefunc (and its subroutines) are expanded with a constant, and we
switch on that constant at the outermost level.
r~