The 32-bit immediate OR instructions are in the extended-immediate facility. Use these only if present.
At the same time, pull the logic to load immediates into registers into a constraint letter for TCG. Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/s390/tcg-target.c | 92 +++++++++++++++++++++++++++++++++++++------------ 1 files changed, 70 insertions(+), 22 deletions(-) diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 359f6d1..36d4ad0 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -38,6 +38,7 @@ #define TCG_CT_CONST_ADDI 0x0400 #define TCG_CT_CONST_MULI 0x0800 #define TCG_CT_CONST_ANDI 0x1000 +#define TCG_CT_CONST_ORI 0x2000 #define TCG_TMP0 TCG_REG_R14 @@ -358,6 +359,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct &= ~TCG_CT_REG; ct->ct |= TCG_CT_CONST_ANDI; break; + case 'O': + ct->ct &= ~TCG_CT_REG; + ct->ct |= TCG_CT_CONST_ORI; + break; default: break; } @@ -424,6 +429,36 @@ static int tcg_match_andi(int ct, tcg_target_ulong val) return 1; } +/* Immediates to be used with logical OR. This is an optimization only, + since a full 64-bit immediate OR can always be performed with 4 sequential + OI[LH][LH] instructions. What we're looking for is immediates that we + can load efficiently, and the immediate load plus the reg-reg OR is + smaller than the sequential OI's. */ + +static int tcg_match_ori(int ct, tcg_target_long val) +{ + if (facilities & FACILITY_EXT_IMM) { + if (ct & TCG_CT_CONST_32) { + /* All 32-bit ORs can be performed with 1 48-bit insn. */ + return 1; + } + } + + /* Look for negative values. These are best to load with LGHI. */ + if (val < 0) { + if (val == (int16_t)val) { + return 0; + } + if (facilities & FACILITY_EXT_IMM) { + if (val == (int32_t)val) { + return 0; + } + } + } + + return 1; +} + /* Test if a constant matches the constraint. */ static int tcg_target_const_match(tcg_target_long val, const TCGArgConstraint *arg_ct) @@ -465,6 +500,8 @@ static int tcg_target_const_match(tcg_target_long val, } } else if (ct & TCG_CT_CONST_ANDI) { return tcg_match_andi(ct, val); + } else if (ct & TCG_CT_CONST_ORI) { + return tcg_match_ori(ct, val); } return 0; @@ -907,34 +944,45 @@ static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val) int i; - /* Zero-th, look for no-op. */ + /* Look for no-op. */ if (val == 0) { return; } - /* First, try all 32-bit insns that can perform it in one go. */ - for (i = 0; i < 4; i++) { - tcg_target_ulong mask = (0xffffull << i*16); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); - return; + if (facilities & FACILITY_EXT_IMM) { + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + return; + } } - } - /* Second, try all 48-bit insns that can perform it in one go. */ - for (i = 0; i < 2; i++) { - tcg_target_ulong mask = (0xffffffffull << i*32); - if ((val & mask) != 0 && (val & ~mask) == 0) { - tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); - return; + /* Try all 48-bit insns that can perform it in one go. */ + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = (0xffffffffull << i*32); + if ((val & mask) != 0 && (val & ~mask) == 0) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } } - } - /* Last, perform the OR via sequential modifications to the - high and low parts. Do this via recursion to handle 16-bit - vs 32-bit masks in each half. */ - tgen64_ori(s, dest, val & 0x00000000ffffffffull); - tgen64_ori(s, dest, val & 0xffffffff00000000ull); + /* Perform the OR via sequential modifications to the high and + low parts. Do this via recursion to handle 16-bit vs 32-bit + masks in each half. */ + tgen64_ori(s, dest, val & 0x00000000ffffffffull); + tgen64_ori(s, dest, val & 0xffffffff00000000ull); + } else { + /* With no extended-immediate facility, we don't need to be so + clever. Just iterate over the insns and mask in the constant. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = (0xffffull << i*16); + if ((val & mask) != 0) { + tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16); + } + } + } } static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val) @@ -1764,7 +1812,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, { INDEX_op_and_i32, { "r", "0", "rWA" } }, - { INDEX_op_or_i32, { "r", "0", "ri" } }, + { INDEX_op_or_i32, { "r", "0", "rWO" } }, { INDEX_op_xor_i32, { "r", "0", "ri" } }, { INDEX_op_neg_i32, { "r", "r" } }, @@ -1825,7 +1873,7 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, { INDEX_op_and_i64, { "r", "0", "rA" } }, - { INDEX_op_or_i64, { "r", "0", "ri" } }, + { INDEX_op_or_i64, { "r", "0", "rO" } }, { INDEX_op_xor_i64, { "r", "0", "ri" } }, { INDEX_op_neg_i64, { "r", "r" } }, -- 1.7.0.1