The TCG translator will *not* work in 32-bit mode, and there is a check added to query_facilities to enforce that.
However, QEMU can run in KVM mode when built in 32-bit mode, and this patch is just good enough to enable that method to continue. Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/s390/tcg-target.c | 386 +++++++++++++++++++++++++------------------------ tcg/s390/tcg-target.h | 7 + 2 files changed, 205 insertions(+), 188 deletions(-) diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index d5c26b8..8719ed7 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -731,7 +731,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, return; } if ((uval & 0xffffffff) == 0) { - tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32); + tcg_out_insn(s, RIL, LLIHF, ret, uval >> 31 >> 1); return; } } @@ -761,7 +761,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, We first want to make sure that all the high bits get set. With luck the low 16-bits can be considered negative to perform that for free, otherwise we load an explicit -1. */ - if (sval >> 32 == -1) { + if (sval >> 31 >> 1 == -1) { if (uval & 0x8000) { tcg_out_insn(s, RI, LGHI, ret, uval); } else { @@ -779,7 +779,7 @@ static void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_movi(s, TCG_TYPE_I32, ret, sval); /* Insert data into the high 32-bits. */ - uval >>= 32; + uval = uval >> 31 >> 1; if (facilities & FACILITY_EXT_IMM) { if (uval < 0x10000) { tcg_out_insn(s, RI, IIHL, ret, uval); @@ -962,7 +962,7 @@ static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src) tcg_out_insn(s, RRE, LLGFR, dest, src); } -static void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val) +static inline void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val) { if (val == (int16_t)val) { tcg_out_insn(s, RI, AHI, dest, val); @@ -971,7 +971,7 @@ static void tgen32_addi(TCGContext *s, TCGReg dest, int32_t val) } } -static void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val) +static inline void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val) { if (val == (int16_t)val) { tcg_out_insn(s, RI, AGHI, dest, val); @@ -1112,7 +1112,7 @@ static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val) tcg_out_insn(s, RIL, XILF, dest, val); } if (val > 0xffffffff) { - tcg_out_insn(s, RIL, XIHF, dest, val >> 32); + tcg_out_insn(s, RIL, XIHF, dest, val >> 31 >> 1); } } @@ -1593,6 +1593,15 @@ static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc) #endif } +#if TCG_TARGET_REG_BITS == 64 +# define OP_32_64(x) \ + case glue(glue(INDEX_op_,x),_i32): \ + case glue(glue(INDEX_op_,x),_i64) +#else +# define OP_32_64(x) \ + case glue(glue(INDEX_op_,x),_i32) +#endif + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1625,21 +1634,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; - case INDEX_op_ld8u_i32: - case INDEX_op_ld8u_i64: + OP_32_64(ld8u): /* ??? LLC (RXY format) is only present with the extended-immediate facility, whereas LLGC is always present. */ tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); break; - case INDEX_op_ld8s_i32: - case INDEX_op_ld8s_i64: + OP_32_64(ld8s): /* ??? LB is no smaller than LGB, so no point to using it. */ tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); break; - case INDEX_op_ld16u_i32: - case INDEX_op_ld16u_i64: + OP_32_64(ld16u): /* ??? LLH (RXY format) is only present with the extended-immediate facility, whereas LLGH is always present. */ tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); @@ -1648,45 +1654,25 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ld16s_i32: tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); break; - case INDEX_op_ld16s_i64: - tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); - break; case INDEX_op_ld_i32: tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_ld32u_i64: - tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - case INDEX_op_ld32s_i64: - tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); - break; - - case INDEX_op_ld_i64: - tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - case INDEX_op_st8_i32: - case INDEX_op_st8_i64: + OP_32_64(st8): tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], TCG_REG_NONE, args[2]); break; - case INDEX_op_st16_i32: - case INDEX_op_st16_i64: + OP_32_64(st16): tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], TCG_REG_NONE, args[2]); break; case INDEX_op_st_i32: - case INDEX_op_st32_i64: tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); break; - case INDEX_op_st_i64: - tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); - break; - case INDEX_op_add_i32: if (const_args[2]) { tgen32_addi(s, args[0], args[2]); @@ -1694,14 +1680,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, RR, AR, args[0], args[2]); } break; - case INDEX_op_add_i64: - if (const_args[2]) { - tgen64_addi(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, AGR, args[0], args[2]); - } - break; - case INDEX_op_sub_i32: if (const_args[2]) { tgen32_addi(s, args[0], -args[2]); @@ -1709,13 +1687,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, RR, SR, args[0], args[2]); } break; - case INDEX_op_sub_i64: - if (const_args[2]) { - tgen64_addi(s, args[0], -args[2]); - } else { - tcg_out_insn(s, RRE, SGR, args[0], args[2]); - } - break; case INDEX_op_and_i32: if (const_args[2]) { @@ -1739,34 +1710,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; - case INDEX_op_and_i64: - if (const_args[2]) { - tgen64_andi(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, NGR, args[0], args[2]); - } - break; - case INDEX_op_or_i64: - if (const_args[2]) { - tgen64_ori(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, OGR, args[0], args[2]); - } - break; - case INDEX_op_xor_i64: - if (const_args[2]) { - tgen64_xori(s, args[0], args[2]); - } else { - tcg_out_insn(s, RRE, XGR, args[0], args[2]); - } - break; - case INDEX_op_neg_i32: tcg_out_insn(s, RR, LCR, args[0], args[1]); break; - case INDEX_op_neg_i64: - tcg_out_insn(s, RRE, LCGR, args[0], args[1]); - break; case INDEX_op_mul_i32: if (const_args[2]) { @@ -1779,17 +1725,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, RRE, MSR, args[0], args[2]); } break; - case INDEX_op_mul_i64: - if (const_args[2]) { - if (args[2] == (int16_t)args[2]) { - tcg_out_insn(s, RI, MGHI, args[0], args[2]); - } else { - tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); - } - } else { - tcg_out_insn(s, RRE, MSGR, args[0], args[2]); - } - break; case INDEX_op_div2_i32: tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); @@ -1798,17 +1733,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); break; - case INDEX_op_div2_i64: - /* ??? We get an unnecessary sign-extension of the dividend - into R3 with this definition, but as we do in fact always - produce both quotient and remainder using INDEX_op_div_i64 - instead requires jumping through even more hoops. */ - tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); - break; - case INDEX_op_divu2_i64: - tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); - break; - case INDEX_op_shl_i32: op = RS_SLL; do_shift32: @@ -1825,22 +1749,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, op = RS_SRA; goto do_shift32; - case INDEX_op_shl_i64: - op = RSY_SLLG; - do_shift64: - if (const_args[2]) { - tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, op, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_shr_i64: - op = RSY_SRLG; - goto do_shift64; - case INDEX_op_sar_i64: - op = RSY_SRAG; - goto do_shift64; - case INDEX_op_rotl_i32: /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */ if (const_args[2]) { @@ -1859,72 +1767,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; - case INDEX_op_rotl_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, args[2]); - } else { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); - } - break; - case INDEX_op_rotr_i64: - if (const_args[2]) { - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], - TCG_REG_NONE, (64 - args[2]) & 63); - } else { - /* We can use the smaller 32-bit negate because only the - low 6 bits are examined for the rotate. */ - tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); - tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); - } - break; - case INDEX_op_ext8s_i32: tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]); break; - case INDEX_op_ext8s_i64: - tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]); - break; case INDEX_op_ext16s_i32: tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]); break; - case INDEX_op_ext16s_i64: - tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext32s_i64: - tgen_ext32s(s, args[0], args[1]); - break; - case INDEX_op_ext8u_i32: tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]); break; - case INDEX_op_ext8u_i64: - tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]); - break; case INDEX_op_ext16u_i32: tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]); break; - case INDEX_op_ext16u_i64: - tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); - break; - case INDEX_op_ext32u_i64: - tgen_ext32u(s, args[0], args[1]); - break; - case INDEX_op_bswap16_i32: - case INDEX_op_bswap16_i64: + OP_32_64(bswap16): /* The TCG bswap definition requires bits 0-47 already be zero. Thus we don't need the G-type insns to implement bswap16_i64. */ tcg_out_insn(s, RRE, LRVR, args[0], args[1]); tcg_out_sh32(s, RS_SRL, args[0], TCG_REG_NONE, 16); break; - case INDEX_op_bswap32_i32: - case INDEX_op_bswap32_i64: + OP_32_64(bswap32): tcg_out_insn(s, RRE, LRVR, args[0], args[1]); break; - case INDEX_op_bswap64_i64: - tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); - break; case INDEX_op_br: tgen_branch(s, S390_CC_ALWAYS, args[0]); @@ -1934,46 +1798,27 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_brcond(s, TCG_TYPE_I32, args[2], args[0], args[1], const_args[1], args[3]); break; - case INDEX_op_brcond_i64: - tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], - args[1], const_args[1], args[3]); - break; - case INDEX_op_setcond_i32: tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], const_args[2]); break; - case INDEX_op_setcond_i64: - tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], - args[2], const_args[2]); - break; case INDEX_op_qemu_ld8u: tcg_out_qemu_ld(s, args, LD_UINT8); break; - case INDEX_op_qemu_ld8s: tcg_out_qemu_ld(s, args, LD_INT8); break; - case INDEX_op_qemu_ld16u: tcg_out_qemu_ld(s, args, LD_UINT16); break; - case INDEX_op_qemu_ld16s: tcg_out_qemu_ld(s, args, LD_INT16); break; - case INDEX_op_qemu_ld32: /* ??? Technically we can use a non-extending instruction. */ - case INDEX_op_qemu_ld32u: tcg_out_qemu_ld(s, args, LD_UINT32); break; - - case INDEX_op_qemu_ld32s: - tcg_out_qemu_ld(s, args, LD_INT32); - break; - case INDEX_op_qemu_ld64: tcg_out_qemu_ld(s, args, LD_UINT64); break; @@ -1981,23 +1826,178 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_qemu_st8: tcg_out_qemu_st(s, args, LD_UINT8); break; - case INDEX_op_qemu_st16: tcg_out_qemu_st(s, args, LD_UINT16); break; - case INDEX_op_qemu_st32: tcg_out_qemu_st(s, args, LD_UINT32); break; - case INDEX_op_qemu_st64: tcg_out_qemu_st(s, args, LD_UINT64); break; - case INDEX_op_mov_i32: - case INDEX_op_mov_i64: - case INDEX_op_movi_i32: - case INDEX_op_movi_i64: +#if TCG_TARGET_REG_BITS == 64 + case INDEX_op_ld16s_i64: + tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld32u_i64: + tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_st32_i64: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + case INDEX_op_st_i64: + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_add_i64: + if (const_args[2]) { + tgen64_addi(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, AGR, args[0], args[2]); + } + break; + case INDEX_op_sub_i64: + if (const_args[2]) { + tgen64_addi(s, args[0], -args[2]); + } else { + tcg_out_insn(s, RRE, SGR, args[0], args[2]); + } + break; + + case INDEX_op_and_i64: + if (const_args[2]) { + tgen64_andi(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, NGR, args[0], args[2]); + } + break; + case INDEX_op_or_i64: + if (const_args[2]) { + tgen64_ori(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, OGR, args[0], args[2]); + } + break; + case INDEX_op_xor_i64: + if (const_args[2]) { + tgen64_xori(s, args[0], args[2]); + } else { + tcg_out_insn(s, RRE, XGR, args[0], args[2]); + } + break; + + case INDEX_op_neg_i64: + tcg_out_insn(s, RRE, LCGR, args[0], args[1]); + break; + case INDEX_op_bswap64_i64: + tcg_out_insn(s, RRE, LRVGR, args[0], args[1]); + break; + + case INDEX_op_mul_i64: + if (const_args[2]) { + if (args[2] == (int16_t)args[2]) { + tcg_out_insn(s, RI, MGHI, args[0], args[2]); + } else { + tcg_out_insn(s, RIL, MSGFI, args[0], args[2]); + } + } else { + tcg_out_insn(s, RRE, MSGR, args[0], args[2]); + } + break; + + case INDEX_op_div2_i64: + /* ??? We get an unnecessary sign-extension of the dividend + into R3 with this definition, but as we do in fact always + produce both quotient and remainder using INDEX_op_div_i64 + instead requires jumping through even more hoops. */ + tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); + break; + case INDEX_op_divu2_i64: + tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); + break; + + case INDEX_op_shl_i64: + op = RSY_SLLG; + do_shift64: + if (const_args[2]) { + tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, op, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_shr_i64: + op = RSY_SRLG; + goto do_shift64; + case INDEX_op_sar_i64: + op = RSY_SRAG; + goto do_shift64; + + case INDEX_op_rotl_i64: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], + TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_rotr_i64: + if (const_args[2]) { + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], + TCG_REG_NONE, (64 - args[2]) & 63); + } else { + /* We can use the smaller 32-bit negate because only the + low 6 bits are examined for the rotate. */ + tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]); + tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0); + } + break; + + case INDEX_op_ext8s_i64: + tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext16s_i64: + tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext32s_i64: + tgen_ext32s(s, args[0], args[1]); + break; + case INDEX_op_ext8u_i64: + tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext16u_i64: + tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_ext32u_i64: + tgen_ext32u(s, args[0], args[1]); + break; + + case INDEX_op_brcond_i64: + tgen_brcond(s, TCG_TYPE_I64, args[2], args[0], + args[1], const_args[1], args[3]); + break; + case INDEX_op_setcond_i64: + tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], + args[2], const_args[2]); + break; + + case INDEX_op_qemu_ld32u: + tcg_out_qemu_ld(s, args, LD_UINT32); + break; + case INDEX_op_qemu_ld32s: + tcg_out_qemu_ld(s, args, LD_INT32); + break; +#endif /* TCG_TARGET_REG_BITS == 64 */ + + OP_32_64(mov): + OP_32_64(movi): /* These are always emitted by TCG directly. */ case INDEX_op_jmp: /* This one is obsolete and never emitted. */ @@ -2064,8 +2064,6 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_qemu_ld8s, { "r", "L" } }, { INDEX_op_qemu_ld16u, { "r", "L" } }, { INDEX_op_qemu_ld16s, { "r", "L" } }, - { INDEX_op_qemu_ld32u, { "r", "L" } }, - { INDEX_op_qemu_ld32s, { "r", "L" } }, { INDEX_op_qemu_ld32, { "r", "L" } }, { INDEX_op_qemu_ld64, { "r", "L" } }, @@ -2124,6 +2122,9 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_brcond_i64, { "r", "rC" } }, { INDEX_op_setcond_i64, { "r", "r", "rC" } }, + + { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32s, { "r", "L" } }, #endif { -1 }, @@ -2217,13 +2218,22 @@ static void query_facilities(void) worthwhile, since even the KVM target requires z/Arch. */ fail = 0; if ((facilities & FACILITY_ZARCH_ACTIVE) == 0) { - fprintf(stderr, "TCG: z/Arch facility is required\n"); + fprintf(stderr, "TCG: z/Arch facility is required.\n"); + fprintf(stderr, "TCG: Boot with a 64-bit enabled kernel.\n"); fail = 1; } if ((facilities & FACILITY_LONG_DISP) == 0) { - fprintf(stderr, "TCG: long-displacement facility is required\n"); + fprintf(stderr, "TCG: long-displacement facility is required.\n"); fail = 1; } + + /* So far there's just enough support for 31-bit mode to let the + compile succeed. This is good enough to run QEMU with KVM. */ + if (sizeof(void *) != 8) { + fprintf(stderr, "TCG: 31-bit mode is not supported.\n"); + fail = 1; + } + if (fail) { exit(-1); } diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 390c587..4e45cf3 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -23,7 +23,12 @@ */ #define TCG_TARGET_S390 1 +#ifdef __s390x__ #define TCG_TARGET_REG_BITS 64 +#else +#define TCG_TARGET_REG_BITS 32 +#endif + #define TCG_TARGET_WORDS_BIGENDIAN typedef enum TCGReg { @@ -64,6 +69,7 @@ typedef enum TCGReg { // #define TCG_TARGET_HAS_nand_i32 // #define TCG_TARGET_HAS_nor_i32 +#if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_div2_i64 #define TCG_TARGET_HAS_rot_i64 #define TCG_TARGET_HAS_ext8s_i64 @@ -82,6 +88,7 @@ typedef enum TCGReg { // #define TCG_TARGET_HAS_eqv_i64 // #define TCG_TARGET_HAS_nand_i64 // #define TCG_TARGET_HAS_nor_i64 +#endif #define TCG_TARGET_HAS_GUEST_BASE -- 1.7.0.1