Add 32-bit and 64-bit mulu2 and muls2 TCG ops. On IA64, 32-bit ops should just ignore the 32 most significant bits of registers, and can leave them with non-zero values. This means registers should be zero/sign extended before doing the actual multiplying. This leave some slots in the bundle to possibly load a constant.
Signed-off-by: Aurelien Jarno <aurel...@aurel32.net> --- tcg/ia64/tcg-target.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ tcg/ia64/tcg-target.h | 8 ++--- 2 files changed, 97 insertions(+), 4 deletions(-) diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 2373d9e..a46234d 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -278,6 +278,8 @@ enum { OPC_SUB_A3 = 0x10128000000ull, OPC_UNPACK4_L_I2 = 0x0f860000000ull, OPC_XMA_L_F2 = 0x1d000000000ull, + OPC_XMA_H_F2 = 0x1dc00000000ull, + OPC_XMA_HU_F2 = 0x1d800000000ull, OPC_XOR_A1 = 0x10078000000ull, OPC_ZXT1_I29 = 0x00080000000ull, OPC_ZXT2_I29 = 0x00088000000ull, @@ -1098,6 +1100,79 @@ static inline void tcg_out_mul(TCGContext *s, TCGArg ret, tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); } +static inline void tcg_out_mul2_i32(TCGContext *s, TCGArg retl, TCGArg reth, + TCGArg arg1, int const_arg1, + TCGArg arg2, int const_arg2, + int is_signed) +{ + uint64_t opc2, opc3; + + if (const_arg1 && arg1 != 0) { + opc2 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, + TCG_REG_R2, arg1, TCG_REG_R0); + } else if (is_signed) { + opc2 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R2, arg1); + } else { + opc2 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R2, arg1); + } + if (const_arg2 && arg2 != 0) { + opc3 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5, + TCG_REG_R3, arg2, TCG_REG_R0); + } else if (is_signed) { + opc3 = tcg_opc_i29(TCG_REG_P0, OPC_SXT4_I29, TCG_REG_R3, arg2); + } else { + opc3 = tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R3, arg2); + } + + tcg_out_bundle(s, miI, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + opc2, + opc3); + tcg_out_bundle(s, mmI, + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, + TCG_REG_F6, TCG_REG_R2), + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, + TCG_REG_F7, TCG_REG_R3), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + tcg_out_bundle(s, mmF, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F6, + TCG_REG_F7, TCG_REG_F0)); + tcg_out_bundle(s, MmI, + tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6), + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, reth, retl, 32, 31)); +} + +static inline void tcg_out_mul2_i64(TCGContext *s, TCGArg retl, + TCGArg reth, TCGArg arg1, TCGArg arg2, + int is_signed) +{ + uint64_t opc_xma_h_f2; + + opc_xma_h_f2 = is_signed ? OPC_XMA_H_F2 : OPC_XMA_HU_F2; + + tcg_out_bundle(s, mmI, + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F8, arg1), + tcg_opc_m18(TCG_REG_P0, OPC_SETF_SIG_M18, TCG_REG_F9, arg2), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); + tcg_out_bundle(s, mmf, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_f2 (TCG_REG_P0, OPC_XMA_L_F2, TCG_REG_F6, TCG_REG_F8, + TCG_REG_F9, TCG_REG_F0)); + tcg_out_bundle(s, mmF, + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0), + tcg_opc_f2 (TCG_REG_P0, opc_xma_h_f2, TCG_REG_F7, TCG_REG_F8, + TCG_REG_F9, TCG_REG_F0)); + tcg_out_bundle(s, mmI, + tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, retl, TCG_REG_F6), + tcg_opc_m19(TCG_REG_P0, OPC_GETF_SIG_M19, reth, TCG_REG_F7), + tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0)); +} + static inline void tcg_out_sar_i32(TCGContext *s, TCGArg ret, TCGArg arg1, TCGArg arg2, int const_arg2) { @@ -2107,6 +2182,20 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mul_i64: tcg_out_mul(s, args[0], args[1], args[2]); break; + case INDEX_op_mulu2_i32: + tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2], + args[3], const_args[3], 0); + break; + case INDEX_op_mulu2_i64: + tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 0); + break; + case INDEX_op_muls2_i32: + tcg_out_mul2_i32(s, args[0], args[1], args[2], const_args[2], + args[3], const_args[3], 1); + break; + case INDEX_op_muls2_i64: + tcg_out_mul2_i64(s, args[0], args[1], args[2], args[3], 1); + break; case INDEX_op_sar_i32: tcg_out_sar_i32(s, args[0], args[1], args[2], const_args[2]); @@ -2275,6 +2364,8 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_xor_i32, { "r", "rI", "rI" } }, { INDEX_op_mul_i32, { "r", "rZ", "rZ" } }, + { INDEX_op_mulu2_i32, { "r", "r", "rI", "rI" } }, + { INDEX_op_muls2_i32, { "r", "r", "rI", "rI" } }, { INDEX_op_sar_i32, { "r", "rZ", "ri" } }, { INDEX_op_shl_i32, { "r", "rZ", "ri" } }, @@ -2322,6 +2413,8 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_xor_i64, { "r", "rI", "rI" } }, { INDEX_op_mul_i64, { "r", "rZ", "rZ" } }, + { INDEX_op_mulu2_i64, { "r", "r", "rZ", "rZ" } }, + { INDEX_op_muls2_i64, { "r", "r", "rZ", "rZ" } }, { INDEX_op_sar_i64, { "r", "rZ", "ri" } }, { INDEX_op_shl_i64, { "r", "rZ", "ri" } }, diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index e3d72ea..75e357e 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -140,10 +140,10 @@ typedef enum { #define TCG_TARGET_HAS_add2_i64 0 #define TCG_TARGET_HAS_sub2_i32 0 #define TCG_TARGET_HAS_sub2_i64 0 -#define TCG_TARGET_HAS_mulu2_i32 0 -#define TCG_TARGET_HAS_mulu2_i64 0 -#define TCG_TARGET_HAS_muls2_i32 0 -#define TCG_TARGET_HAS_muls2_i64 0 +#define TCG_TARGET_HAS_mulu2_i32 1 +#define TCG_TARGET_HAS_mulu2_i64 1 +#define TCG_TARGET_HAS_muls2_i32 1 +#define TCG_TARGET_HAS_muls2_i64 1 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) -- 1.7.10.4