Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/s390/tcg-target.c | 119 +++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 111 insertions(+), 8 deletions(-)
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 53a92c5..a17ef91 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -33,10 +33,11 @@ do { } while (0) #endif -#define TCG_CT_CONST_32 0x100 -#define TCG_CT_CONST_NEG 0x200 -#define TCG_CT_CONST_ADDI 0x400 -#define TCG_CT_CONST_ANDI 0x800 +#define TCG_CT_CONST_32 0x0100 +#define TCG_CT_CONST_NEG 0x0200 +#define TCG_CT_CONST_ADDI 0x0400 +#define TCG_CT_CONST_ANDI 0x1000 +#define TCG_CT_CONST_ORI 0x2000 /* Several places within the instruction set 0 means "no register" rather than TCG_REG_R0. */ @@ -64,6 +65,8 @@ typedef enum S390Opcode { RIL_LLILF = 0xc00f, RIL_NIHF = 0xc00a, RIL_NILF = 0xc00b, + RIL_OIHF = 0xc00c, + RIL_OILF = 0xc00d, RI_AGHI = 0xa70b, RI_AHI = 0xa70a, @@ -81,6 +84,10 @@ typedef enum S390Opcode { RI_NIHL = 0xa505, RI_NILH = 0xa506, RI_NILL = 0xa507, + RI_OIHH = 0xa508, + RI_OIHL = 0xa509, + RI_OILH = 0xa50a, + RI_OILL = 0xa50b, RRE_AGR = 0xb908, RRE_CGR = 0xb920, @@ -330,6 +337,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct &= ~TCG_CT_REG; ct->ct |= TCG_CT_CONST_ANDI; break; + case 'O': + ct->ct &= ~TCG_CT_REG; + ct->ct |= TCG_CT_CONST_ORI; + break; default: break; } @@ -396,6 +407,36 @@ static int tcg_match_andi(int ct, tcg_target_ulong val) return 1; } +/* Immediates to be used with logical OR. This is an optimization only, + since a full 64-bit immediate OR can always be performed with 4 sequential + OI[LH][LH] instructions. What we're looking for is immediates that we + can load efficiently, and the immediate load plus the reg-reg OR is + smaller than the sequential OI's. */ + +static int tcg_match_ori(int ct, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (ct & TCG_CT_CONST_32) { + /* All 32-bit ORs can be performed with 1 48-bit insn. */ + return 1; + } + } + + /* Look for negative values. These are best to load with LGHI. */ + if (val < 0) { + if (val == (int16_t)val) { + return 0; + } + if (facilities & FACILITY_EXT_IMM) { + if (val == (int32_t)val) { + return 0; + } + } + } + + return 1; +} + /* Test if a constant matches the constraint. */ static int tcg_target_const_match(tcg_target_long val, const TCGArgConstraint *arg_ct) @@ -427,6 +468,8 @@ static int tcg_target_const_match(tcg_target_long val, } } else if (ct & TCG_CT_CONST_ANDI) { return tcg_match_andi(ct, val); + } else if (ct & TCG_CT_CONST_ORI) { + return tcg_match_ori(ct, val); } return 0; @@ -841,6 +884,58 @@ static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val) } } +static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + static const S390Opcode oi_insns[4] = { + RI_OILL, RI_OILH, RI_OIHL, RI_OIHH + }; + static const S390Opcode nif_insns[2] = { + RIL_OILF, RIL_OIHF + }; + + int i; + + /* Look for no-op. */ + if (val == 0) { + return; + } + + if (facilities & FACILITY_EXT_IMM) { + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + return; + } + } + + /* Try all 48-bit insns that can perform it in one go. */ + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = (0xffffffffull << i*32); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } + } + + /* Perform the OR via sequential modifications to the high and + low parts. Do this via recursion to handle 16-bit vs 32-bit + masks in each half. */ + tgen64_ori(s, dest, val & 0x00000000ffffffffull); + tgen64_ori(s, dest, val & 0xffffffff00000000ull); + } else { + /* With no extended-immediate facility, we don't need to be so + clever. Just iterate over the insns and mask in the constant. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + } + } + } +} + static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2) { if (c > TCG_COND_GT) { @@ -1328,7 +1423,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; case INDEX_op_or_i32: - tcg_out_insn(s, RR, OR, args[0], args[2]); + if (const_args[2]) { + tgen64_ori(s, args[0], args[2] & 0xffffffff); + } else { + tcg_out_insn(s, RR, OR, args[0], args[2]); + } break; case INDEX_op_xor_i32: tcg_out_insn(s, RR, XR, args[0], args[2]); @@ -1342,7 +1441,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; case INDEX_op_or_i64: - tcg_out_insn(s, RRE, OGR, args[0], args[2]); + if (const_args[2]) { + tgen64_ori(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, OGR, args[0], args[2]); + } break; case INDEX_op_xor_i64: tcg_out_insn(s, RRE, XGR, args[0], args[2]); @@ -1606,7 +1709,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, { INDEX_op_and_i32, { "r", "0", "rWA" } }, - { INDEX_op_or_i32, { "r", "0", "r" } }, + { INDEX_op_or_i32, { "r", "0", "rWO" } }, { INDEX_op_xor_i32, { "r", "0", "r" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -1668,7 +1771,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, { INDEX_op_and_i64, { "r", "0", "rA" } }, - { INDEX_op_or_i64, { "r", "0", "r" } }, + { INDEX_op_or_i64, { "r", "0", "rO" } }, { INDEX_op_xor_i64, { "r", "0", "r" } }, { INDEX_op_neg_i64, { "r", "r" } }, -- 1.7.0.1