Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/s390/tcg-target.c | 179 +++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 166 insertions(+), 13 deletions(-)
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 795ddcd..53a92c5 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -36,6 +36,7 @@ #define TCG_CT_CONST_32 0x100 #define TCG_CT_CONST_NEG 0x200 #define TCG_CT_CONST_ADDI 0x400 +#define TCG_CT_CONST_ANDI 0x800 /* Several places within the instruction set 0 means "no register" rather than TCG_REG_R0. */ @@ -61,6 +62,8 @@ typedef enum S390Opcode { RIL_LGFI = 0xc001, RIL_LLIHF = 0xc00e, RIL_LLILF = 0xc00f, + RIL_NIHF = 0xc00a, + RIL_NILF = 0xc00b, RI_AGHI = 0xa70b, RI_AHI = 0xa70a, @@ -74,6 +77,10 @@ typedef enum S390Opcode { RI_LLIHL = 0xa50d, RI_LLILH = 0xa50e, RI_LLILL = 0xa50f, + RI_NIHH = 0xa504, + RI_NIHL = 0xa505, + RI_NILH = 0xa506, + RI_NILL = 0xa507, RRE_AGR = 0xb908, RRE_CGR = 0xb920, @@ -319,6 +326,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct &= ~TCG_CT_REG; ct->ct |= TCG_CT_CONST_ADDI; break; + case 'A': + ct->ct &= ~TCG_CT_REG; + ct->ct |= TCG_CT_CONST_ANDI; + break; default: break; } @@ -328,9 +339,66 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) return 0; } +/* Immediates to be used with logical AND. This is an optimization only, + since a full 64-bit immediate AND can always be performed with 4 sequential + NI[LH][LH] instructions. What we're looking for is immediates that we + can load efficiently, and the immediate load plus the reg-reg AND is + smaller than the sequential NI's. */ + +static int tcg_match_andi(int ct, tcg_target_ulong val) +{ + int i; + + if (facilities & FACILITY_EXT_IMM) { + if (ct & TCG_CT_CONST_32) { + /* All 32-bit ANDs can be performed with 1 48-bit insn. */ + return 1; + } + + /* Zero-extensions. */ + if (val == 0xff || val == 0xffff || val == 0xffffffff) { + return 1; + } + } else { + if (ct & TCG_CT_CONST_32) { + val = (uint32_t)val; + } else if (val == 0xffffffff) { + return 1; + } + } + + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = ~(0xffffull << i*16); + if ((val & mask) == mask) { + return 1; + } + } + + /* Look for 16-bit values performing the mask. These are better + to load with LLI[LH][LH]. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = 0xffffull << i*16; + if ((val & mask) == val) { + return 0; + } + } + + /* Look for 32-bit values performing the 64-bit mask. These + are better to load with LLI[LH]F, or if extended immediates + not available, with a pair of LLI insns. */ + if ((ct & TCG_CT_CONST_32) == 0) { + if (val <= 0xffffffff || (val & 0xffffffff) == 0) { + return 0; + } + } + + return 1; +} + /* Test if a constant matches the constraint. */ -static inline int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) +static int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct) { int ct = arg_ct->ct; @@ -357,6 +425,8 @@ static inline int tcg_target_const_match(tcg_target_long val, } else { return val == (int16_t)val; } + } else if (ct & TCG_CT_CONST_ANDI) { + return tcg_match_andi(ct, val); } return 0; @@ -703,6 +773,74 @@ static void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val) } +static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + static const S390Opcode ni_insns[4] = { + RI_NILL, RI_NILH, RI_NIHL, RI_NIHH + }; + static const S390Opcode nif_insns[2] = { + RIL_NILF, RIL_NIHF + }; + + int i; + + /* Look for no-op. */ + if (val == -1) { + return; + } + + /* Look for the zero-extensions. */ + if (val == 0xffffffff) { + tgen_ext32u(s, dest, dest); + return; + } + + if (facilities & FACILITY_EXT_IMM) { + if (val == 0xff) { + tgen_ext8u(s, TCG_TYPE_I64, dest, dest); + return; + } + if (val == 0xffff) { + tgen_ext16u(s, TCG_TYPE_I64, dest, dest); + return; + } + + /* Try all 32-bit insns that can perform it in one go. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = ~(0xffffull << i*16); + if ((val & mask) == mask) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + return; + } + } + + /* Try all 48-bit insns that can perform it in one go. */ + if (facilities & FACILITY_EXT_IMM) { + for (i = 0; i < 2; i++) { + tcg_target_ulong mask = ~(0xffffffffull << i*32); + if ((val & mask) == mask) { + tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32); + return; + } + } + } + + /* Perform the AND via sequential modifications to the high and low + parts. Do this via recursion to handle 16-bit vs 32-bit masks in + each half. */ + tgen64_andi(s, dest, val | 0xffffffff00000000ull); + tgen64_andi(s, dest, val | 0x00000000ffffffffull); + } else { + /* With no extended-immediate facility, just emit the sequence. */ + for (i = 0; i < 4; i++) { + tcg_target_ulong mask = 0xffffull << i*16; + if ((val & mask) != mask) { + tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16); + } + } + } +} + static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2) { if (c > TCG_COND_GT) { @@ -776,6 +914,16 @@ static void tgen_calli(TCGContext *s, tcg_target_long dest) } #if defined(CONFIG_SOFTMMU) +static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val) +{ + if (tcg_match_andi(0, val)) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val); + tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0); + } else { + tgen64_andi(s, dest, val); + } +} + static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg, int mem_index, int opc, uint16_t **label2_ptr_p, int is_store) @@ -803,13 +951,8 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg, tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - TARGET_PAGE_MASK | ((1 << s_bits) - 1)); - tcg_out_insn(s, RRE, NGR, arg0, TCG_TMP0); - - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); - tcg_out_insn(s, RRE, NGR, arg1, TCG_TMP0); + tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); if (is_store) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, @@ -1178,7 +1321,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_and_i32: - tcg_out_insn(s, RR, NR, args[0], args[2]); + if (const_args[2]) { + tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull); + } else { + tcg_out_insn(s, RR, NR, args[0], args[2]); + } break; case INDEX_op_or_i32: tcg_out_insn(s, RR, OR, args[0], args[2]); @@ -1188,7 +1335,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_and_i64: - tcg_out_insn(s, RRE, NGR, args[0], args[2]); + if (const_args[2]) { + tgen64_andi(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, NGR, args[0], args[2]); + } break; case INDEX_op_or_i64: tcg_out_insn(s, RRE, OGR, args[0], args[2]); @@ -1454,9 +1605,10 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, - { INDEX_op_and_i32, { "r", "0", "r" } }, + { INDEX_op_and_i32, { "r", "0", "rWA" } }, { INDEX_op_or_i32, { "r", "0", "r" } }, { INDEX_op_xor_i32, { "r", "0", "r" } }, + { INDEX_op_neg_i32, { "r", "r" } }, { INDEX_op_shl_i32, { "r", "0", "Ri" } }, @@ -1515,9 +1667,10 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, - { INDEX_op_and_i64, { "r", "0", "r" } }, + { INDEX_op_and_i64, { "r", "0", "rA" } }, { INDEX_op_or_i64, { "r", "0", "r" } }, { INDEX_op_xor_i64, { "r", "0", "r" } }, + { INDEX_op_neg_i64, { "r", "r" } }, { INDEX_op_shl_i64, { "r", "r", "Ri" } }, -- 1.7.0.1