We already have stubs for a TCG target on S390, but were missing code that would actually generate instructions.
So I took Uli's patch, cleaned it up and present it to you again :-). I hope I found all odd coding style and unprettiness issues, but if you still spot one feel free to nag about it. Signed-off-by: Alexander Graf <ag...@suse.de> CC: Uli Hecht <u...@suse.de> Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/s390/tcg-target.c | 1171 ++++++++++++++++++++++++++++++++++++++++++++++++- tcg/s390/tcg-target.h | 13 +- 2 files changed, 1157 insertions(+), 27 deletions(-) diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 265194a..55f0fa9 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -2,6 +2,7 @@ * Tiny Code Generator for QEMU * * Copyright (c) 2009 Ulrich Hecht <u...@suse.de> + * Copyright (c) 2009 Alexander Graf <ag...@suse.de> * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -22,81 +23,1209 @@ * THE SOFTWARE. */ +/* #define DEBUG_S390_TCG */ + +#ifdef DEBUG_S390_TCG +#define dprintf(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#else +#define dprintf(fmt, ...) \ + do { } while (0) +#endif + +#define TCG_CT_CONST_S16 0x100 +#define TCG_CT_CONST_U12 0x200 + +/* Several places within the instruction set 0 means "no register" + rather than TCG_REG_R0. */ +#define TCG_REG_NONE 0 + +/* All of the following instructions are prefixed with their instruction + format, and are defined as 8- or 16-bit quantities, even when the two + halves of the 16-bit quantity may appear 32 bits apart in the insn. + This makes it easy to copy the values from the tables in Appendix B. */ +typedef enum S390Opcode { + RIL_BRASL = 0xc005, + RIL_BRCL = 0xc004, + RIL_LARL = 0xc000, + + RI_AGHI = 0xa70b, + RI_AHI = 0xa70a, + RI_BRC = 0xa704, + RI_IILH = 0xa502, + RI_LGHI = 0xa709, + RI_LLILL = 0xa50f, + + RRE_AGR = 0xb908, + RRE_CGR = 0xb920, + RRE_CLGR = 0xb921, + RRE_DLGR = 0xb987, + RRE_DLR = 0xb997, + RRE_DSGFR = 0xb91d, + RRE_DSGR = 0xb90d, + RRE_LCGR = 0xb903, + RRE_LGFR = 0xb914, + RRE_LGR = 0xb904, + RRE_LLGFR = 0xb916, + RRE_MSGR = 0xb90c, + RRE_MSR = 0xb252, + RRE_NGR = 0xb980, + RRE_OGR = 0xb981, + RRE_SGR = 0xb909, + RRE_XGR = 0xb982, + + RR_AR = 0x1a, + RR_BASR = 0x0d, + RR_BCR = 0x07, + RR_CLR = 0x15, + RR_CR = 0x19, + RR_DR = 0x1d, + RR_LCR = 0x13, + RR_LR = 0x18, + RR_NR = 0x14, + RR_OR = 0x16, + RR_SR = 0x1b, + RR_XR = 0x17, + + RSY_SLLG = 0xeb0d, + RSY_SRAG = 0xeb0a, + RSY_SRLG = 0xeb0c, + + RS_SLL = 0x89, + RS_SRA = 0x8a, + RS_SRL = 0x88, + + RXY_CG = 0xe320, + RXY_LB = 0xe376, + RXY_LG = 0xe304, + RXY_LGB = 0xe377, + RXY_LGF = 0xe314, + RXY_LGH = 0xe315, + RXY_LHY = 0xe378, + RXY_LLC = 0xe394, + RXY_LLGC = 0xe390, + RXY_LLGF = 0xe316, + RXY_LLGH = 0xe391, + RXY_LLH = 0xe395, + RXY_LMG = 0xeb04, + RXY_LRV = 0xe31e, + RXY_LRVG = 0xe30f, + RXY_LRVH = 0xe31f, + RXY_LY = 0xe358, + RXY_STCY = 0xe372, + RXY_STG = 0xe324, + RXY_STHY = 0xe370, + RXY_STMG = 0xeb24, + RXY_STRV = 0xe33e, + RXY_STRVG = 0xe32f, + RXY_STRVH = 0xe33f, + RXY_STY = 0xe350, + + RX_L = 0x58, + RX_LH = 0x48, + RX_ST = 0x50, + RX_STC = 0x42, + RX_STH = 0x40, +} S390Opcode; + +#define LD_SIGNED 0x04 +#define LD_UINT8 0x00 +#define LD_INT8 (LD_UINT8 | LD_SIGNED) +#define LD_UINT16 0x01 +#define LD_INT16 (LD_UINT16 | LD_SIGNED) +#define LD_UINT32 0x02 +#define LD_INT32 (LD_UINT32 | LD_SIGNED) +#define LD_UINT64 0x03 +#define LD_INT64 (LD_UINT64 | LD_SIGNED) + +#ifndef NDEBUG +static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { + "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", + "%r8", "%r9", "%r10" "%r11" "%r12" "%r13" "%r14" "%r15" +}; +#endif + static const int tcg_target_reg_alloc_order[] = { + TCG_REG_R6, + TCG_REG_R7, + TCG_REG_R8, + TCG_REG_R9, + TCG_REG_R10, + TCG_REG_R11, + TCG_REG_R12, + TCG_REG_R13, + TCG_REG_R14, + TCG_REG_R0, + TCG_REG_R1, + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, }; static const int tcg_target_call_iarg_regs[] = { + TCG_REG_R2, + TCG_REG_R3, + TCG_REG_R4, + TCG_REG_R5, + TCG_REG_R6, }; static const int tcg_target_call_oarg_regs[] = { + TCG_REG_R2, + TCG_REG_R3, +}; + +/* signed/unsigned is handled by using COMPARE and COMPARE LOGICAL, + respectively */ + +#define S390_CC_EQ 8 +#define S390_CC_LT 4 +#define S390_CC_GT 2 +#define S390_CC_OV 1 +#define S390_CC_NE (S390_CC_LT | S390_CC_GT) +#define S390_CC_LE (S390_CC_LT | S390_CC_EQ) +#define S390_CC_GE (S390_CC_GT | S390_CC_EQ) +#define S390_CC_ALWAYS 15 + +static const uint8_t tcg_cond_to_s390_cond[10] = { + [TCG_COND_EQ] = S390_CC_EQ, + [TCG_COND_LT] = S390_CC_LT, + [TCG_COND_LTU] = S390_CC_LT, + [TCG_COND_LE] = S390_CC_LE, + [TCG_COND_LEU] = S390_CC_LE, + [TCG_COND_GT] = S390_CC_GT, + [TCG_COND_GTU] = S390_CC_GT, + [TCG_COND_GE] = S390_CC_GE, + [TCG_COND_GEU] = S390_CC_GE, + [TCG_COND_NE] = S390_CC_NE, +}; + +#ifdef CONFIG_SOFTMMU + +#include "../../softmmu_defs.h" + +static void *qemu_ld_helpers[4] = { + __ldb_mmu, + __ldw_mmu, + __ldl_mmu, + __ldq_mmu, +}; + +static void *qemu_st_helpers[4] = { + __stb_mmu, + __stw_mmu, + __stl_mmu, + __stq_mmu, }; +#endif + +static uint8_t *tb_ret_addr; static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend) { - tcg_abort(); + uint32_t *code_ptr_32 = (uint32_t*)code_ptr; + tcg_target_long code_ptr_tlong = (tcg_target_long)code_ptr; + + switch (type) { + case R_390_PC32DBL: + *code_ptr_32 = (value - (code_ptr_tlong + addend)) >> 1; + break; + default: + tcg_abort(); + break; + } } -static inline int tcg_target_get_call_iarg_regs_count(int flags) +static int tcg_target_get_call_iarg_regs_count(int flags) { - tcg_abort(); - return 0; + return sizeof(tcg_target_call_iarg_regs) / sizeof(int); } /* parse target specific constraints */ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) { - tcg_abort(); + const char *ct_str; + + ct->ct |= TCG_CT_REG; + tcg_regset_set32(ct->u.regs, 0, 0xffff); + ct_str = *pct_str; + + switch (ct_str[0]) { + case 'L': /* qemu_ld/st constraint */ + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R2); + tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3); + break; + case 'R': /* not R0 */ + tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0); + break; + case 'a': /* force R2 for division */ + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_R2); + break; + case 'b': /* force R3 for division */ + tcg_regset_clear(ct->u.regs); + tcg_regset_set_reg(ct->u.regs, TCG_REG_R3); + break; + case 'I': + ct->ct &= ~TCG_CT_REG; + ct->ct |= TCG_CT_CONST_S16; + break; + default: + break; + } + ct_str++; + *pct_str = ct_str; + return 0; } /* Test if a constant matches the constraint. */ static inline int tcg_target_const_match(tcg_target_long val, - const TCGArgConstraint *arg_ct) + const TCGArgConstraint *arg_ct) { - tcg_abort(); + int ct = arg_ct->ct; + + if ((ct & TCG_CT_CONST) || + ((ct & TCG_CT_CONST_S16) && val == (int16_t)val) || + ((ct & TCG_CT_CONST_U12) && val == (val & 0xfff))) { + return 1; + } + return 0; } +/* Emit instructions according to the given instruction format. */ + +static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2) +{ + tcg_out16(s, (op << 8) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op, + TCGReg r1, TCGReg r2) +{ + tcg_out32(s, (op << 16) | (r1 << 4) | r2); +} + +static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ + tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff)); +} + +static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2) +{ + tcg_out16(s, op | (r1 << 4)); + tcg_out32(s, i2); +} + +static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1, + TCGReg b2, TCGReg r3, int disp) +{ + tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12) + | (disp & 0xfff)); +} + +static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1, + TCGReg b2, TCGReg r3, int disp) +{ + tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3); + tcg_out32(s, (op & 0xff) | (b2 << 28) + | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4)); +} + +#define tcg_out_insn_RX tcg_out_insn_RS +#define tcg_out_insn_RXY tcg_out_insn_RSY + +/* Emit an opcode with "type-checking" of the format. */ +#define tcg_out_insn(S, FMT, OP, ...) \ + glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__) + + +/* emit 64-bit shifts */ +static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest, + TCGReg src, TCGReg sh_reg, int sh_imm) +{ + tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm); +} + +/* emit 32-bit shifts */ +static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest, + TCGReg sh_reg, int sh_imm) +{ + tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm); +} + +static inline void tcg_out_mov(TCGContext *s, int ret, int arg) +{ + /* ??? With a TCGType argument, we could emit the smaller LR insn. */ + tcg_out_insn(s, RRE, LGR, ret, arg); +} + /* load a register with an immediate value */ static inline void tcg_out_movi(TCGContext *s, TCGType type, int ret, tcg_target_long arg) { - tcg_abort(); + if (arg >= -0x8000 && arg < 0x8000) { /* signed immediate load */ + tcg_out_insn(s, RI, LGHI, ret, arg); + } else if (!(arg & 0xffffffffffff0000UL)) { + tcg_out_insn(s, RI, LLILL, ret, arg); + } else if (!(arg & 0xffffffff00000000UL) || type == TCG_TYPE_I32) { + tcg_out_insn(s, RI, LLILL, ret, arg); + tcg_out_insn(s, RI, IILH, ret, arg >> 16); + } else { + /* branch over constant and store its address in R13 */ + tcg_out_insn(s, RIL, BRASL, TCG_REG_R13, (6 + 8) >> 1); + /* 64-bit constant */ + tcg_out32(s, arg >> 32); + tcg_out32(s, arg); + /* load constant to ret */ + tcg_out_insn(s, RXY, LG, ret, TCG_REG_R13, 0, 0); + } } + +/* Emit a load/store type instruction. Inputs are: + DATA: The register to be loaded or stored. + BASE+OFS: The effective address. + OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0. + OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */ + +static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy, + TCGReg data, TCGReg base, TCGReg index, + tcg_target_long ofs) +{ + if (ofs < -0x80000 || ofs >= 0x80000) { + /* Combine the low 16 bits of the offset with the actual load insn; + the high 48 bits must come from an immediate load. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, ofs & ~0xffff); + ofs &= 0xffff; + + /* If we were already given an index register, add it in. */ + if (index != TCG_REG_NONE) { + tcg_out_insn(s, RRE, AGR, TCG_REG_R13, index); + } + index = TCG_REG_R13; + } + + if (opc_rx && ofs >= 0 && ofs < 0x1000) { + tcg_out_insn_RX(s, opc_rx, data, base, index, ofs); + } else { + tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs); + } +} + + /* load data without address translation or endianness conversion */ -static inline void tcg_out_ld(TCGContext *s, TCGType type, int arg, - int arg1, tcg_target_long arg2) +static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, tcg_target_long ofs) { - tcg_abort(); + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs); + } else { + tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs); + } } -static inline void tcg_out_st(TCGContext *s, TCGType type, int arg, - int arg1, tcg_target_long arg2) +static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data, + TCGReg base, tcg_target_long ofs) { - tcg_abort(); + if (type == TCG_TYPE_I32) { + tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs); + } else { + tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs); + } +} + +static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2) +{ + if (c > TCG_COND_GT) { + /* unsigned */ + tcg_out_insn(s, RR, CLR, r1, r2); + } else { + /* signed */ + tcg_out_insn(s, RR, CR, r1, r2); + } +} + +static void tgen64_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2) +{ + if (c > TCG_COND_GT) { + /* unsigned */ + tcg_out_insn(s, RRE, CLGR, r1, r2); + } else { + /* signed */ + tcg_out_insn(s, RRE, CGR, r1, r2); + } +} + +static void tgen_setcond(TCGContext *s, TCGType type, TCGCond c, + TCGReg dest, TCGReg r1, TCGReg r2) +{ + if (type == TCG_TYPE_I32) { + tgen32_cmp(s, c, r1, r2); + } else { + tgen64_cmp(s, c, r1, r2); + } + /* Emit: r1 = 1; if (cc) goto over; r1 = 0; over: */ + tcg_out_movi(s, type, dest, 1); + tcg_out_insn(s, RI, BRC, tcg_cond_to_s390_cond[c], (4 + 4) >> 1); + tcg_out_movi(s, type, dest, 0); +} + +static void tgen_gotoi(TCGContext *s, int cc, tcg_target_long dest) +{ + tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1; + if (off > -0x8000 && off < 0x7fff) { + tcg_out_insn(s, RI, BRC, cc, off); + } else if (off == (int32_t)off) { + tcg_out_insn(s, RIL, BRCL, cc, off); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, dest); + tcg_out_insn(s, RR, BCR, cc, TCG_REG_R13); + } +} + +static void tgen_branch(TCGContext *s, int cc, int labelno) +{ + TCGLabel* l = &s->labels[labelno]; + if (l->has_value) { + tgen_gotoi(s, cc, l->u.value); + } else { + tcg_out16(s, RIL_BRCL | (cc << 4)); + tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, labelno, -2); + s->code_ptr += 4; + } +} + +static void tgen_calli(TCGContext *s, tcg_target_long dest) +{ + tcg_target_long off = (dest - (tcg_target_long)s->code_ptr) >> 1; + if (off == (int32_t)off) { + tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, dest); + tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_REG_R13); + } +} + +#if defined(CONFIG_SOFTMMU) +static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg, + int mem_index, int opc, + uint16_t **label2_ptr_p, int is_store) + { + int arg0 = TCG_REG_R2; + int arg1 = TCG_REG_R3; + int arg2 = TCG_REG_R4; + int s_bits; + uint16_t *label1_ptr; + + if (is_store) { + s_bits = opc; + } else { + s_bits = opc & 3; + } + +#if TARGET_LONG_BITS == 32 + tcg_out_insn(s, RRE, LLGFR, arg1, addr_reg); + tcg_out_insn(s, RRE, LLGFR, arg0, addr_reg); +#else + tcg_out_mov(s, arg1, addr_reg); + tcg_out_mov(s, arg0, addr_reg); +#endif + + tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE, + TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); + + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, + TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + tcg_out_insn(s, RRE, NGR, arg0, TCG_REG_R13); + + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, + (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); + tcg_out_insn(s, RRE, NGR, arg1, TCG_REG_R13); + + if (is_store) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, + offsetof(CPUState, tlb_table[mem_index][0].addr_write)); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, + offsetof(CPUState, tlb_table[mem_index][0].addr_read)); + } + tcg_out_insn(s, RRE, AGR, arg1, TCG_REG_R13); + + tcg_out_insn(s, RRE, AGR, arg1, TCG_AREG0); + + tcg_out_insn(s, RXY, CG, arg0, arg1, 0, 0); + + label1_ptr = (uint16_t*)s->code_ptr; + + /* je label1 (offset will be patched in later) */ + tcg_out_insn(s, RI, BRC, S390_CC_EQ, 0); + + /* call load/store helper */ +#if TARGET_LONG_BITS == 32 + tcg_out_insn(s, RRE, LLGFR, arg0, addr_reg); +#else + tcg_out_mov(s, arg0, addr_reg); +#endif + + if (is_store) { + tcg_out_mov(s, arg1, data_reg); + tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index); + tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]); + } else { + tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index); + tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]); + + /* sign extension */ + switch (opc) { + case LD_INT8: + tcg_out_insn(s, RSY, SLLG, data_reg, arg0, TCG_REG_NONE, 56); + tcg_out_insn(s, RSY, SRAG, data_reg, data_reg, TCG_REG_NONE, 56); + break; + case LD_INT16: + tcg_out_insn(s, RSY, SLLG, data_reg, arg0, TCG_REG_NONE, 48); + tcg_out_insn(s, RSY, SRAG, data_reg, data_reg, TCG_REG_NONE, 48); + break; + case LD_INT32: + tcg_out_insn(s, RRE, LGFR, data_reg, arg0); + break; + default: + /* unsigned -> just copy */ + tcg_out_mov(s, data_reg, arg0); + break; + } + } + + /* jump to label2 (end) */ + *label2_ptr_p = (uint16_t*)s->code_ptr; + + tcg_out_insn(s, RI, BRC, S390_CC_ALWAYS, 0); + + /* this is label1, patch branch */ + *(label1_ptr + 1) = ((unsigned long)s->code_ptr - + (unsigned long)label1_ptr) >> 1; + + if (is_store) { + tcg_out_insn(s, RXY, LG, arg1, arg1, 0, + offsetof(CPUTLBEntry, addend) + - offsetof(CPUTLBEntry, addr_write)); + } else { + tcg_out_insn(s, RXY, LG, arg1, arg1, 0, + offsetof(CPUTLBEntry, addend) + - offsetof(CPUTLBEntry, addr_read)); + } + +#if TARGET_LONG_BITS == 32 + /* zero upper 32 bits */ + tcg_out_insn(s, RRE, LLGFR, arg0, addr_reg); +#else + /* just copy */ + tcg_out_mov(s, arg0, addr_reg); +#endif + tcg_out_insn(s, RRE, AGR, arg0, arg1); +} + +static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr) +{ + /* patch branch */ + *(label2_ptr + 1) = ((unsigned long)s->code_ptr - + (unsigned long)label2_ptr) >> 1; +} + +#else /* CONFIG_SOFTMMU */ + +static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg, + int mem_index, int opc, + uint16_t **label2_ptr_p, int is_store) +{ + int arg0 = TCG_REG_R2; + + /* user mode, no address translation required */ + if (TARGET_LONG_BITS == 32) { + tcg_out_insn(s, RRE, LLGFR, arg0, addr_reg); + } else { + tcg_out_mov(s, arg0, addr_reg); + } +} + +static void tcg_finish_qemu_ldst(TCGContext* s, uint16_t *label2_ptr) +{ +} + +#endif /* CONFIG_SOFTMMU */ + +/* load data with address translation (if applicable) + and endianness conversion */ +static void tcg_out_qemu_ld(TCGContext* s, const TCGArg* args, int opc) +{ + int addr_reg, data_reg, mem_index; + int arg0 = TCG_REG_R2; + uint16_t *label2_ptr; + + data_reg = *args++; + addr_reg = *args++; + mem_index = *args; + + dprintf("tcg_out_qemu_ld opc %d data_reg %d addr_reg %d mem_index %d\n" + opc, data_reg, addr_reg, mem_index); + + tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, + opc, &label2_ptr, 0); + + switch (opc) { + case LD_UINT8: + tcg_out_insn(s, RXY, LLGC, data_reg, arg0, 0, 0); + break; + case LD_INT8: + tcg_out_insn(s, RXY, LGB, data_reg, arg0, 0, 0); + break; + case LD_UINT16: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RXY, LLGH, data_reg, arg0, 0, 0); +#else + /* swapped unsigned halfword load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRVH, data_reg, arg0, 0, 0); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, 0xffffL); + tcg_out_insn(s, RRE, NGR, data_reg, 13); +#endif + break; + case LD_INT16: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RXY, LGH, data_reg, arg0, 0, 0); +#else + /* swapped sign-extended halfword load */ + tcg_out_insn(s, RXY, LRVH, data_reg, arg0, 0, 0); + tcg_out_insn(s, RSY, SLLG, data_reg, data_reg, TCG_REG_NONE, 48); + tcg_out_insn(s, RSY, SRAG, data_reg, data_reg, TCG_REG_NONE, 48); +#endif + break; + case LD_UINT32: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RXY, LLGF, data_reg, arg0, 0, 0); +#else + /* swapped unsigned int load with upper bits zeroed */ + tcg_out_insn(s, RXY, LRV, data_reg, arg0, 0, 0); + tcg_out_insn(s, RRE, LLGFR, data_reg, data_reg); +#endif + break; + case LD_INT32: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RXY, LGF, data_reg, arg0, 0, 0); +#else + /* swapped sign-extended int load */ + tcg_out_insn(s, RXY, LRV, data_reg, arg0, 0, 0); + tcg_out_insn(s, RRE, LGFR, data_reg, data_reg); +#endif + break; + case LD_UINT64: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RXY, LG, data_reg, arg0, 0, 0); +#else + tcg_out_insn(s, RXY, LRVG, data_reg, arg0, 0, 0); +#endif + break; + default: + tcg_abort(); + } + + tcg_finish_qemu_ldst(s, label2_ptr); +} + +static void tcg_out_qemu_st(TCGContext* s, const TCGArg* args, int opc) +{ + int addr_reg, data_reg, mem_index; + uint16_t *label2_ptr; + int arg0 = TCG_REG_R2; + + data_reg = *args++; + addr_reg = *args++; + mem_index = *args; + + dprintf("tcg_out_qemu_st opc %d data_reg %d addr_reg %d mem_index %d\n" + opc, data_reg, addr_reg, mem_index); + + tcg_prepare_qemu_ldst(s, data_reg, addr_reg, mem_index, + opc, &label2_ptr, 1); + + switch (opc) { + case LD_UINT8: + tcg_out_insn(s, RX, STC, data_reg, arg0, 0, 0); + break; + case LD_UINT16: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RX, STH, data_reg, arg0, 0, 0); +#else + tcg_out_insn(s, RXY, STRVH, data_reg, arg0, 0, 0); +#endif + break; + case LD_UINT32: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RX, ST, data_reg, arg0, 0, 0); +#else + tcg_out_insn(s, RXY, STRV, data_reg, arg0, 0, 0); +#endif + break; + case LD_UINT64: +#ifdef TARGET_WORDS_BIGENDIAN + tcg_out_insn(s, RXY, STG, data_reg, arg0, 0, 0); +#else + tcg_out_insn(s, RXY, STRVG, data_reg, arg0, 0, 0); +#endif + break; + default: + tcg_abort(); + } + + tcg_finish_qemu_ldst(s, label2_ptr); } static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { - tcg_abort(); + S390Opcode op; + + switch (opc) { + case INDEX_op_exit_tb: + /* return value */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, args[0]); + tgen_gotoi(s, S390_CC_ALWAYS, (unsigned long)tb_ret_addr); + break; + + case INDEX_op_goto_tb: + if (s->tb_jmp_offset) { + tcg_abort(); + } else { + tcg_target_long off = ((tcg_target_long)(s->tb_next + args[0]) - + (tcg_target_long)s->code_ptr) >> 1; + if (off == (int32_t)off) { + /* load address relative to PC */ + tcg_out_insn(s, RIL, LARL, TCG_REG_R13, off); + } else { + /* too far for larl */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R13, + (tcg_target_long)(s->tb_next + args[0])); + } + /* load address stored at s->tb_next + args[0] */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R13, TCG_REG_R13, 0); + /* and go there */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R13); + } + s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf; + break; + + case INDEX_op_call: + if (const_args[0]) { + tgen_calli(s, args[0]); + } else { + tcg_out_insn(s, RR, BASR, TCG_REG_R14, args[0]); + } + break; + + case INDEX_op_jmp: + /* XXX */ + tcg_abort(); + break; + + case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: + /* ??? LLC (RXY format) is only present with the extended-immediate + facility, whereas LLGC is always present. */ + tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: + /* ??? LB is no smaller than LGB, so no point to using it. */ + tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: + /* ??? LLH (RXY format) is only present with the extended-immediate + facility, whereas LLGH is always present. */ + tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld16s_i32: + tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld16s_i64: + tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld_i32: + tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + case INDEX_op_ld32u_i64: + tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + case INDEX_op_ld32s_i64: + tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]); + break; + + case INDEX_op_ld_i64: + tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_st8_i32: + case INDEX_op_st8_i64: + tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1], + TCG_REG_NONE, args[2]); + break; + + case INDEX_op_st16_i32: + case INDEX_op_st16_i64: + tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1], + TCG_REG_NONE, args[2]); + break; + + case INDEX_op_st_i32: + case INDEX_op_st32_i64: + tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]); + break; + + case INDEX_op_st_i64: + tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]); + break; + + case INDEX_op_mov_i32: + /* XXX */ + tcg_abort(); + break; + + case INDEX_op_movi_i32: + /* XXX */ + tcg_abort(); + break; + + case INDEX_op_add_i32: + if (const_args[2]) { + tcg_out_insn(s, RI, AHI, args[0], args[2]); + } else { + tcg_out_insn(s, RR, AR, args[0], args[2]); + } + break; + + case INDEX_op_add_i64: + tcg_out_insn(s, RRE, AGR, args[0], args[2]); + break; + + case INDEX_op_sub_i32: + tcg_out_insn(s, RR, SR, args[0], args[2]); + break; + + case INDEX_op_sub_i64: + tcg_out_insn(s, RRE, SGR, args[0], args[2]); + break; + + case INDEX_op_and_i32: + tcg_out_insn(s, RR, NR, args[0], args[2]); + break; + case INDEX_op_or_i32: + tcg_out_insn(s, RR, OR, args[0], args[2]); + break; + case INDEX_op_xor_i32: + tcg_out_insn(s, RR, XR, args[0], args[2]); + break; + + case INDEX_op_and_i64: + tcg_out_insn(s, RRE, NGR, args[0], args[2]); + break; + case INDEX_op_or_i64: + tcg_out_insn(s, RRE, OGR, args[0], args[2]); + break; + case INDEX_op_xor_i64: + tcg_out_insn(s, RRE, XGR, args[0], args[2]); + break; + + case INDEX_op_neg_i32: + /* FIXME: optimize args[0] != args[1] case */ + tcg_out_insn(s, RR, LR, 13, args[1]); + tcg_out_movi(s, TCG_TYPE_I32, args[0], 0); + tcg_out_insn(s, RR, SR, args[0], 13); + break; + case INDEX_op_neg_i64: + /* FIXME: optimize args[0] != args[1] case */ + tcg_out_mov(s, TCG_REG_R13, args[1]); + tcg_out_movi(s, TCG_TYPE_I64, args[0], 0); + tcg_out_insn(s, RRE, SGR, args[0], TCG_REG_R13); + break; + + case INDEX_op_mul_i32: + tcg_out_insn(s, RRE, MSR, args[0], args[2]); + break; + case INDEX_op_mul_i64: + tcg_out_insn(s, RRE, MSGR, args[0], args[2]); + break; + + case INDEX_op_div2_i32: + tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]); + break; + case INDEX_op_divu2_i32: + tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]); + break; + + case INDEX_op_div2_i64: + /* ??? We get an unnecessary sign-extension of the dividend + into R3 with this definition, but as we do in fact always + produce both quotient and remainder using INDEX_op_div_i64 + instead requires jumping through even more hoops. */ + tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]); + break; + case INDEX_op_divu2_i64: + tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]); + break; + + case INDEX_op_shl_i32: + op = RS_SLL; + do_shift32: + if (const_args[2]) { + tcg_out_sh32(s, op, args[0], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh32(s, op, args[0], args[2], 0); + } + break; + case INDEX_op_shr_i32: + op = RS_SRL; + goto do_shift32; + case INDEX_op_sar_i32: + op = RS_SRA; + goto do_shift32; + + case INDEX_op_shl_i64: + op = RSY_SLLG; + do_shift64: + if (const_args[2]) { + tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]); + } else { + tcg_out_sh64(s, op, args[0], args[1], args[2], 0); + } + break; + case INDEX_op_shr_i64: + op = RSY_SRLG; + goto do_shift64; + case INDEX_op_sar_i64: + op = RSY_SRAG; + goto do_shift64; + + case INDEX_op_br: + tgen_branch(s, S390_CC_ALWAYS, args[0]); + break; + + case INDEX_op_brcond_i64: + tgen64_cmp(s, args[2], args[0], args[1]); + goto do_brcond; + case INDEX_op_brcond_i32: + tgen32_cmp(s, args[2], args[0], args[1]); + do_brcond: + tgen_branch(s, tcg_cond_to_s390_cond[args[2]], args[3]); + break; + + case INDEX_op_setcond_i32: + tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2]); + break; + case INDEX_op_setcond_i64: + tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1], args[2]); + break; + + case INDEX_op_qemu_ld8u: + tcg_out_qemu_ld(s, args, LD_UINT8); + break; + + case INDEX_op_qemu_ld8s: + tcg_out_qemu_ld(s, args, LD_INT8); + break; + + case INDEX_op_qemu_ld16u: + tcg_out_qemu_ld(s, args, LD_UINT16); + break; + + case INDEX_op_qemu_ld16s: + tcg_out_qemu_ld(s, args, LD_INT16); + break; + + case INDEX_op_qemu_ld32: + /* ??? Technically we can use a non-extending instruction. */ + case INDEX_op_qemu_ld32u: + tcg_out_qemu_ld(s, args, LD_UINT32); + break; + + case INDEX_op_qemu_ld32s: + tcg_out_qemu_ld(s, args, LD_INT32); + break; + + case INDEX_op_qemu_ld64: + tcg_out_qemu_ld(s, args, LD_UINT64); + break; + + case INDEX_op_qemu_st8: + tcg_out_qemu_st(s, args, LD_UINT8); + break; + + case INDEX_op_qemu_st16: + tcg_out_qemu_st(s, args, LD_UINT16); + break; + + case INDEX_op_qemu_st32: + tcg_out_qemu_st(s, args, LD_UINT32); + break; + + case INDEX_op_qemu_st64: + tcg_out_qemu_st(s, args, LD_UINT64); + break; + + default: + fprintf(stderr,"unimplemented opc 0x%x\n",opc); + tcg_abort(); + } } +static const TCGTargetOpDef s390_op_defs[] = { + { INDEX_op_exit_tb, { } }, + { INDEX_op_goto_tb, { } }, + { INDEX_op_call, { "ri" } }, + { INDEX_op_jmp, { "ri" } }, + { INDEX_op_br, { } }, + + { INDEX_op_mov_i32, { "r", "r" } }, + { INDEX_op_movi_i32, { "r" } }, + + { INDEX_op_ld8u_i32, { "r", "r" } }, + { INDEX_op_ld8s_i32, { "r", "r" } }, + { INDEX_op_ld16u_i32, { "r", "r" } }, + { INDEX_op_ld16s_i32, { "r", "r" } }, + { INDEX_op_ld_i32, { "r", "r" } }, + { INDEX_op_st8_i32, { "r", "r" } }, + { INDEX_op_st16_i32, { "r", "r" } }, + { INDEX_op_st_i32, { "r", "r" } }, + + { INDEX_op_add_i32, { "r", "0", "rI" } }, + { INDEX_op_sub_i32, { "r", "0", "r" } }, + { INDEX_op_mul_i32, { "r", "0", "r" } }, + + { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } }, + { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } }, + + { INDEX_op_and_i32, { "r", "0", "r" } }, + { INDEX_op_or_i32, { "r", "0", "r" } }, + { INDEX_op_xor_i32, { "r", "0", "r" } }, + { INDEX_op_neg_i32, { "r", "r" } }, + + { INDEX_op_shl_i32, { "r", "0", "Ri" } }, + { INDEX_op_shr_i32, { "r", "0", "Ri" } }, + { INDEX_op_sar_i32, { "r", "0", "Ri" } }, + + { INDEX_op_brcond_i32, { "r", "r" } }, + { INDEX_op_setcond_i32, { "r", "r", "r" } }, + + { INDEX_op_qemu_ld8u, { "r", "L" } }, + { INDEX_op_qemu_ld8s, { "r", "L" } }, + { INDEX_op_qemu_ld16u, { "r", "L" } }, + { INDEX_op_qemu_ld16s, { "r", "L" } }, + { INDEX_op_qemu_ld32u, { "r", "L" } }, + { INDEX_op_qemu_ld32s, { "r", "L" } }, + { INDEX_op_qemu_ld32, { "r", "L" } }, + { INDEX_op_qemu_ld64, { "r", "L" } }, + + { INDEX_op_qemu_st8, { "L", "L" } }, + { INDEX_op_qemu_st16, { "L", "L" } }, + { INDEX_op_qemu_st32, { "L", "L" } }, + { INDEX_op_qemu_st64, { "L", "L" } }, + +#if defined(__s390x__) + { INDEX_op_mov_i64, { "r", "r" } }, + { INDEX_op_movi_i64, { "r" } }, + + { INDEX_op_ld8u_i64, { "r", "r" } }, + { INDEX_op_ld8s_i64, { "r", "r" } }, + { INDEX_op_ld16u_i64, { "r", "r" } }, + { INDEX_op_ld16s_i64, { "r", "r" } }, + { INDEX_op_ld32u_i64, { "r", "r" } }, + { INDEX_op_ld32s_i64, { "r", "r" } }, + { INDEX_op_ld_i64, { "r", "r" } }, + + { INDEX_op_st8_i64, { "r", "r" } }, + { INDEX_op_st16_i64, { "r", "r" } }, + { INDEX_op_st32_i64, { "r", "r" } }, + { INDEX_op_st_i64, { "r", "r" } }, + + { INDEX_op_add_i64, { "r", "0", "r" } }, + { INDEX_op_sub_i64, { "r", "0", "r" } }, + { INDEX_op_mul_i64, { "r", "0", "r" } }, + + { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } }, + { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } }, + + { INDEX_op_and_i64, { "r", "0", "r" } }, + { INDEX_op_or_i64, { "r", "0", "r" } }, + { INDEX_op_xor_i64, { "r", "0", "r" } }, + { INDEX_op_neg_i64, { "r", "r" } }, + + { INDEX_op_shl_i64, { "r", "r", "Ri" } }, + { INDEX_op_shr_i64, { "r", "r", "Ri" } }, + { INDEX_op_sar_i64, { "r", "r", "Ri" } }, + + { INDEX_op_brcond_i64, { "r", "r" } }, + { INDEX_op_setcond_i64, { "r", "r", "r" } }, +#endif + + { -1 }, +}; + void tcg_target_init(TCGContext *s) { - /* gets called with KVM */ +#if !defined(CONFIG_USER_ONLY) + /* fail safe */ + if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) { + tcg_abort(); + } +#endif + + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); + tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); + tcg_regset_set32(tcg_target_call_clobber_regs, 0, + (1 << TCG_REG_R0) | + (1 << TCG_REG_R1) | + (1 << TCG_REG_R2) | + (1 << TCG_REG_R3) | + (1 << TCG_REG_R4) | + (1 << TCG_REG_R5) | + (1 << TCG_REG_R14)); /* link register */ + + tcg_regset_clear(s->reserved_regs); + /* frequently used as a temporary */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); + /* another temporary */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R12); + /* XXX many insns can't be used with R0, so we better avoid it for now */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); + /* The stack pointer. */ + tcg_regset_set_reg(s->reserved_regs, TCG_REG_R15); + + tcg_add_target_add_op_defs(s390_op_defs); } void tcg_target_qemu_prologue(TCGContext *s) { - /* gets called with KVM */ -} + /* stmg %r6,%r15,48(%r15) (save registers) */ + tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48); -static inline void tcg_out_mov(TCGContext *s, int ret, int arg) -{ - tcg_abort(); + /* aghi %r15,-160 (stack frame) */ + tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -160); + + /* br %r2 (go to TB) */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R2); + + tb_ret_addr = s->code_ptr; + + /* lmg %r6,%r15,208(%r15) (restore registers) */ + tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 208); + + /* br %r14 (return) */ + tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14); } static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val) diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 8c19262..26dafae 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -26,7 +26,7 @@ #define TCG_TARGET_REG_BITS 64 #define TCG_TARGET_WORDS_BIGENDIAN -enum { +typedef enum TCGReg { TCG_REG_R0 = 0, TCG_REG_R1, TCG_REG_R2, @@ -43,11 +43,12 @@ enum { TCG_REG_R13, TCG_REG_R14, TCG_REG_R15 -}; +} TCGReg; + #define TCG_TARGET_NB_REGS 16 /* optional instructions */ -// #define TCG_TARGET_HAS_div_i32 +#define TCG_TARGET_HAS_div2_i32 // #define TCG_TARGET_HAS_rot_i32 // #define TCG_TARGET_HAS_ext8s_i32 // #define TCG_TARGET_HAS_ext16s_i32 @@ -56,14 +57,14 @@ enum { // #define TCG_TARGET_HAS_bswap16_i32 // #define TCG_TARGET_HAS_bswap32_i32 // #define TCG_TARGET_HAS_not_i32 -// #define TCG_TARGET_HAS_neg_i32 +#define TCG_TARGET_HAS_neg_i32 // #define TCG_TARGET_HAS_andc_i32 // #define TCG_TARGET_HAS_orc_i32 // #define TCG_TARGET_HAS_eqv_i32 // #define TCG_TARGET_HAS_nand_i32 // #define TCG_TARGET_HAS_nor_i32 -// #define TCG_TARGET_HAS_div_i64 +#define TCG_TARGET_HAS_div2_i64 // #define TCG_TARGET_HAS_rot_i64 // #define TCG_TARGET_HAS_ext8s_i64 // #define TCG_TARGET_HAS_ext16s_i64 @@ -75,7 +76,7 @@ enum { // #define TCG_TARGET_HAS_bswap32_i64 // #define TCG_TARGET_HAS_bswap64_i64 // #define TCG_TARGET_HAS_not_i64 -// #define TCG_TARGET_HAS_neg_i64 +#define TCG_TARGET_HAS_neg_i64 // #define TCG_TARGET_HAS_andc_i64 // #define TCG_TARGET_HAS_orc_i64 // #define TCG_TARGET_HAS_eqv_i64 -- 1.7.0.1