Add an bswap16 and bswap32 ops, either using the rev and rev16 instructions on ARMv6+ or shifts and logical operations on previous ARM versions. In both cases the result use less instructions than the pure TCG version.
These ops are also needed by the qemu_ld/st functions. Signed-off-by: Aurelien Jarno <aurel...@aurel32.net> --- tcg/arm/tcg-target.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ tcg/arm/tcg-target.h | 4 ++-- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index d8ba5f1..aec1183 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -503,6 +503,44 @@ static inline void tcg_out_ext16u(TCGContext *s, int cond, #endif } +static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn) +{ +#ifdef USE_ARMV6_INSTRUCTIONS + /* rev16 */ + tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn); +#else + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, rn, SHIFT_IMM_LSL(24)); + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, TCG_REG_R8, SHIFT_IMM_LSR(16)); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, TCG_REG_R8, rn, SHIFT_IMM_LSR(8)); +#endif +} + +static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn) +{ +#ifdef USE_ARMV6_INSTRUCTIONS + /* rev */ + tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn); +#else + /* This code only uses one temporary register. There is probably + a faster way to do that with more temporary registers. */ + tcg_out_dat_reg(s, cond, ARITH_MOV, + TCG_REG_R8, 0, rn, SHIFT_IMM_ROR(8)); + tcg_out_dat_imm(s, cond, ARITH_BIC, + rd, TCG_REG_R8, 0xff); + tcg_out_dat_imm(s, cond, ARITH_BIC, + rd, rd, 0xff | 0x800); + tcg_out_dat_imm(s, cond, ARITH_BIC, + TCG_REG_R8, TCG_REG_R8, 0xff | 0x400); + tcg_out_dat_imm(s, cond, ARITH_BIC, + TCG_REG_R8, TCG_REG_R8, 0xff | 0xc00); + tcg_out_dat_reg(s, cond, ARITH_ORR, + rd, rd, TCG_REG_R8, SHIFT_IMM_ROR(16)); +#endif +} + static inline void tcg_out_ld32_12(TCGContext *s, int cond, int rd, int rn, tcg_target_long im) { @@ -1520,6 +1558,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_qemu_st(s, COND_AL, args, 3); break; + case INDEX_op_bswap16_i32: + tcg_out_bswap16(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_bswap32_i32: + tcg_out_bswap32(s, COND_AL, args[0], args[1]); + break; + case INDEX_op_ext8s_i32: tcg_out_ext8s(s, COND_AL, args[0], args[1]); break; @@ -1607,6 +1652,9 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_qemu_st64, { "x", "D", "x", "X" } }, #endif + { INDEX_op_bswap16_i32, { "r", "r" } }, + { INDEX_op_bswap32_i32, { "r", "r" } }, + { INDEX_op_ext8s_i32, { "r", "r" } }, { INDEX_op_ext16s_i32, { "r", "r" } }, { INDEX_op_ext16u_i32, { "r", "r" } }, diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 1f6d665..d8d7d94 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -62,8 +62,8 @@ enum { #define TCG_TARGET_HAS_ext16s_i32 #undef TCG_TARGET_HAS_ext8u_i32 /* and r0, r1, #0xff */ #define TCG_TARGET_HAS_ext16u_i32 -// #define TCG_TARGET_HAS_bswap16_i32 -// #define TCG_TARGET_HAS_bswap32_i32 +#define TCG_TARGET_HAS_bswap16_i32 +#define TCG_TARGET_HAS_bswap32_i32 #define TCG_TARGET_HAS_not_i32 #define TCG_TARGET_HAS_neg_i32 #define TCG_TARGET_HAS_rot_i32 -- 1.7.0.4