Richard Henderson <richard.hender...@linaro.org> writes: > This will eventually simplify front-end usage, and will allow > backends to unset TCG_TARGET_HAS_MEMORY_BSWAP without loss of > optimization. > > The argument is added during expansion, not currently exposed > to the front end translators. Non-zero values are not yet > supported by any backends. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org> > --- > include/tcg/tcg-opc.h | 10 +++++----- > include/tcg/tcg.h | 12 ++++++++++++ > tcg/tcg-op.c | 13 ++++++++----- > tcg/README | 18 ++++++++++-------- > 4 files changed, 35 insertions(+), 18 deletions(-) > > diff --git a/include/tcg/tcg-opc.h b/include/tcg/tcg-opc.h > index bbb0884af8..fddcc42cbd 100644 > --- a/include/tcg/tcg-opc.h > +++ b/include/tcg/tcg-opc.h > @@ -96,8 +96,8 @@ DEF(ext8s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8s_i32)) > DEF(ext16s_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16s_i32)) > DEF(ext8u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext8u_i32)) > DEF(ext16u_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ext16u_i32)) > -DEF(bswap16_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap16_i32)) > -DEF(bswap32_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_bswap32_i32)) > +DEF(bswap16_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap16_i32)) > +DEF(bswap32_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_bswap32_i32)) > DEF(not_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_not_i32)) > DEF(neg_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_neg_i32)) > DEF(andc_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_andc_i32)) > @@ -165,9 +165,9 @@ DEF(ext32s_i64, 1, 1, 0, IMPL64 | > IMPL(TCG_TARGET_HAS_ext32s_i64)) > DEF(ext8u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8u_i64)) > DEF(ext16u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16u_i64)) > DEF(ext32u_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32u_i64)) > -DEF(bswap16_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) > -DEF(bswap32_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) > -DEF(bswap64_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) > +DEF(bswap16_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap16_i64)) > +DEF(bswap32_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap32_i64)) > +DEF(bswap64_i64, 1, 1, 1, IMPL64 | IMPL(TCG_TARGET_HAS_bswap64_i64)) > DEF(not_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_not_i64)) > DEF(neg_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_neg_i64)) > DEF(andc_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_andc_i64)) > diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h > index 064dab383b..7a060e532d 100644 > --- a/include/tcg/tcg.h > +++ b/include/tcg/tcg.h > @@ -430,6 +430,18 @@ typedef enum { > TCG_COND_GTU = 8 | 4 | 0 | 1, > } TCGCond; > > +/* > + * Flags for the bswap opcodes. > + * If IZ, the input is zero-extended, otherwise unknown. > + * If OZ or OS, the output is zero- or sign-extended respectively, > + * otherwise the high bits are undefined. > + */ > +enum { > + TCG_BSWAP_IZ = 1, > + TCG_BSWAP_OZ = 2, > + TCG_BSWAP_OS = 4, > +}; > +
So is a TCG_BSWAP_IZ only really for cases where we have loaded up a narrower width value into the "natural" TCG sized register? We seem to assume this is always the case even though the TCG bswap op doesn't have visibility of how the arg value was loaded. > /* Invert the sense of the comparison. */ > static inline TCGCond tcg_invert_cond(TCGCond c) > { > diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c > index dcc2ed0bbc..dc65577e2f 100644 > --- a/tcg/tcg-op.c > +++ b/tcg/tcg-op.c > @@ -1005,7 +1005,8 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg) > void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg) > { > if (TCG_TARGET_HAS_bswap16_i32) { > - tcg_gen_op2_i32(INDEX_op_bswap16_i32, ret, arg); > + tcg_gen_op3i_i32(INDEX_op_bswap16_i32, ret, arg, > + TCG_BSWAP_IZ | TCG_BSWAP_OZ); > } else { > TCGv_i32 t0 = tcg_temp_new_i32(); > > @@ -1020,7 +1021,7 @@ void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg) > void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) > { > if (TCG_TARGET_HAS_bswap32_i32) { > - tcg_gen_op2_i32(INDEX_op_bswap32_i32, ret, arg); > + tcg_gen_op3i_i32(INDEX_op_bswap32_i32, ret, arg, 0); > } else { > TCGv_i32 t0 = tcg_temp_new_i32(); > TCGv_i32 t1 = tcg_temp_new_i32(); > @@ -1661,7 +1662,8 @@ void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg) > tcg_gen_bswap16_i32(TCGV_LOW(ret), TCGV_LOW(arg)); > tcg_gen_movi_i32(TCGV_HIGH(ret), 0); > } else if (TCG_TARGET_HAS_bswap16_i64) { > - tcg_gen_op2_i64(INDEX_op_bswap16_i64, ret, arg); > + tcg_gen_op3i_i64(INDEX_op_bswap16_i64, ret, arg, > + TCG_BSWAP_IZ | TCG_BSWAP_OZ); > } else { > TCGv_i64 t0 = tcg_temp_new_i64(); > > @@ -1680,7 +1682,8 @@ void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg) > tcg_gen_bswap32_i32(TCGV_LOW(ret), TCGV_LOW(arg)); > tcg_gen_movi_i32(TCGV_HIGH(ret), 0); > } else if (TCG_TARGET_HAS_bswap32_i64) { > - tcg_gen_op2_i64(INDEX_op_bswap32_i64, ret, arg); > + tcg_gen_op3i_i64(INDEX_op_bswap32_i64, ret, arg, > + TCG_BSWAP_IZ | TCG_BSWAP_OZ); > } else { > TCGv_i64 t0 = tcg_temp_new_i64(); > TCGv_i64 t1 = tcg_temp_new_i64(); > @@ -1717,7 +1720,7 @@ void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg) > tcg_temp_free_i32(t0); > tcg_temp_free_i32(t1); > } else if (TCG_TARGET_HAS_bswap64_i64) { > - tcg_gen_op2_i64(INDEX_op_bswap64_i64, ret, arg); > + tcg_gen_op3i_i64(INDEX_op_bswap64_i64, ret, arg, 0); > } else { > TCGv_i64 t0 = tcg_temp_new_i64(); > TCGv_i64 t1 = tcg_temp_new_i64(); > diff --git a/tcg/README b/tcg/README > index 8510d823e3..19fbf6ca52 100644 > --- a/tcg/README > +++ b/tcg/README > @@ -295,19 +295,21 @@ ext32u_i64 t0, t1 > > 8, 16 or 32 bit sign/zero extension (both operands must have the same type) > > -* bswap16_i32/i64 t0, t1 > +* bswap16_i32/i64 t0, t1, flags > > -16 bit byte swap on a 32/64 bit value. It assumes that the two/six high order > -bytes are set to zero. > +16 bit byte swap on a 32/64 bit value. The flags values control how > +the input and output sign- or zero-extension is treated. > > -* bswap32_i32/i64 t0, t1 > +* bswap32_i32/i64 t0, t1, flags > > -32 bit byte swap on a 32/64 bit value. With a 64 bit value, it assumes that > -the four high order bytes are set to zero. > +32 bit byte swap on a 32/64 bit value. For 32-bit value, the flags > +are ignored; for a 64-bit value the flags values control how the > +input and output sign- or zero-extension is treated. > > -* bswap64_i64 t0, t1 > +* bswap64_i64 t0, t1, flags > > -64 bit byte swap > +64 bit byte swap. The flags are ignored -- the argument is present > +for consistency with the smaller bswaps. > > * discard_i32/i64 t0 -- Alex Bennée