The difference being that INSN_* has been pre-shifted into place. This should result in less runtime shifting of constants.
The patch could be split into smaller pieces for clarity... Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/aarch64/tcg-target.c | 518 ++++++++++++++++++++++++----------------------- 1 file changed, 269 insertions(+), 249 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 76595b4..3640486 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -212,7 +212,76 @@ static inline int tcg_target_const_match(tcg_target_long val, return 0; } -enum aarch64_cond_code { +typedef enum { + /* Logical immediate instructions */ + INSN_ANDI = 0x12000000, + INSN_ORRI = 0x32000000, + INSN_EORI = 0x52000000, + + /* Logical shifted register instructions */ + INSN_AND = 0x0a000000, + INSN_BIC = 0x0a200000, + INSN_ORR = 0x2a000000, + INSN_ORN = 0x2a200000, + INSN_EOR = 0x4a000000, + INSN_EON = 0x4a200000, + + /* Move wide immediate instructions */ + INSN_MOVN = 0x12800000, + INSN_MOVZ = 0x52800000, + INSN_MOVK = 0x72800000, + + /* Add/subtract immediate instructions */ + INSN_ADDI = 0x11000000, + INSN_ADDSI = 0x31000000, + INSN_SUBI = 0x51000000, + INSN_SUBSI = 0x71000000, + + /* Add/subtract shifted register instructions */ + INSN_ADD = 0x0b000000, + INSN_SUB = 0x4b000000, + INSN_SUBS = 0x6b000000, + + /* Data-processing (1 source) instructions */ + INSN_REV16 = 0x5ac00400, + INSN_REVx = 0xdac00c00, + INSN_REVw = 0x5ac00800, + + /* Data-processing (2 source) instructions */ + INSN_LSLV = 0x1ac02000, + INSN_LSRV = 0x1ac02400, + INSN_ASRV = 0x1ac02800, + INSN_RORV = 0x1ac02c00, + INSN_MUL = 0x1b007c00, /* MADD alias with Ra = xzr */ + + /* Bitfield instructions */ + INSN_SBFM = 0x13000000, + INSN_UBFM = 0x53000000, + INSN_EXTR = 0x13800000, + + /* Conditional select instructions */ + INSN_CSINC = 0x1a800400, + + /* Branch instructions */ + INSN_B = 0x14000000, + INSN_BL = 0x94000000, + INSN_BR = 0xd61f0000, + INSN_BLR = 0xd63f0000, + INSN_RET = 0xd65f0000, + INSN_B_C = 0x54000000, + + /* System instructions */ + INSN_NOP = 0xd503201f, +} AArch64Insn; + +typedef enum { + E32 = 0, + E64 = 0x80000000u, +} AArch64Ext; + +#define EXT(cond) (cond ? E64 : E32) + +typedef enum { COND_EQ = 0x0, COND_NE = 0x1, COND_CS = 0x2, /* Unsigned greater or equal */ @@ -231,9 +300,9 @@ enum aarch64_cond_code { COND_LE = 0xd, COND_AL = 0xe, COND_NV = 0xf, /* behaves like COND_AL here */ -}; +} AArch64Cond; -static const enum aarch64_cond_code tcg_cond_to_aarch64[] = { +static const AArch64Cond tcg_cond_to_aarch64[] = { [TCG_COND_EQ] = COND_EQ, [TCG_COND_NE] = COND_NE, [TCG_COND_LT] = COND_LT, @@ -261,31 +330,6 @@ enum aarch64_ldst_op_type { /* type of operation */ LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */ }; -enum aarch64_arith_opc { - ARITH_AND = 0x0a, - ARITH_ADD = 0x0b, - ARITH_ADDI = 0x11, - ARITH_ANDI = 0x12, - ARITH_OR = 0x2a, - ARITH_ADDS = 0x2b, - ARITH_ADDSI = 0x31, - ARITH_ORI = 0x32, - ARITH_XOR = 0x4a, - ARITH_SUB = 0x4b, - ARITH_SUBI = 0x51, - ARITH_XORI = 0x52, - ARITH_ANDS = 0x6a, - ARITH_SUBS = 0x6b, - ARITH_SUBSI = 0x71, -}; - -enum aarch64_srr_opc { - SRR_SHL = 0x0, - SRR_SHR = 0x4, - SRR_SAR = 0x8, - SRR_ROR = 0xc -}; - static inline enum aarch64_ldst_op_data aarch64_ldst_get_data(TCGOpcode tcg_op) { @@ -395,46 +439,78 @@ static inline void tcg_out_ldst_12(TCGContext *s, | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd); } -static inline void tcg_out_movr(TCGContext *s, int ext, TCGReg rd, TCGReg src) +/* Suitable for add/sub and logical shifted register instructions. */ +static inline void tcg_out_arith(TCGContext *s, AArch64Insn insn, + AArch64Ext ext, TCGReg rd, TCGReg rn, + TCGReg rm, int shift_imm) { - /* register to register move using MOV (shifted register with no shift) */ - /* using MOV 0x2a0003e0 | (shift).. */ - unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0; - tcg_out32(s, base | src << 16 | rd); + unsigned int shift; + if (shift_imm == 0) { + shift = 0; + } else if (shift_imm > 0) { + shift = shift_imm << 10 | 1 << 22; + } else /* (shift_imm < 0) */ { + shift = (-shift_imm) << 10; + } + tcg_out32(s, insn | ext | shift | rm << 16 | rn << 5 | rd); } -static inline void tcg_out_movi_aux(TCGContext *s, - TCGReg rd, uint64_t value) +static inline void tcg_out_aimm(TCGContext *s, AArch64Insn insn, + AArch64Ext ext, TCGReg rd, TCGReg rn, + uint64_t aimm) { - uint32_t half, base, shift, movk = 0; - /* construct halfwords of the immediate with MOVZ/MOVK with LSL */ - /* using MOVZ 0x52800000 | extended reg.. */ - base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000; - /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the - first MOVZ with the half-word immediate skipping the zeros, with a shift - (LSL) equal to this number. Then morph all next instructions into MOVKs. + if (aimm > 0xfff) { + assert((aimm & 0xfff) == 0); + aimm >>= 12; + assert(aimm <= 0xfff); + aimm |= 1 << 12; /* apply LSL 12 */ + } + tcg_out32(s, insn | ext | aimm << 10 | rn << 5 | rd); +} + +static void tcg_out_limm(TCGContext *s, AArch64Insn insn, AArch64Ext ext, + TCGReg rd, TCGReg rn, tcg_target_long val) +{ + int index = (ext ? find_bitmask64(val) : find_bitmask32(val)); + assert(index >= 0); + tcg_out32(s, insn | ext | bitmask_enc[index] << 10 | rn << 5 | rd); +} + +/* Register-register move, assuming XSP not XZR. */ +static inline void tcg_out_movr(TCGContext *s, AArch64Ext ext, + TCGReg dest, TCGReg src) +{ + tcg_out_aimm(s, INSN_ADDI, ext, dest, src, 0); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, + tcg_target_long value) +{ + AArch64Insn insn = INSN_MOVZ; + + if (type == TCG_TYPE_I32) { + value = (uint32_t)value; + } + + /* Construct halfwords of the immediate with MOVZ/MOVK with LSL. + Count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the + first MOVZ with the half-word immediate skipping the zeros, with + a shift (LSL) equal to this number. Then all other insns are MOVKs. Zero the processed half-word in the value, continue until empty. We build the final result 16bits at a time with up to 4 instructions, but do not emit instructions for 16bit zero holes. */ do { - shift = ctz64(value) & (63 & -16); - half = (value >> shift) & 0xffff; - tcg_out32(s, base | movk | shift << 17 | half << 5 | rd); - movk = 0x20000000; /* morph next MOVZs into MOVKs */ + unsigned shift = ctz64(value) & (63 & -16); + unsigned half = (value >> shift) & 0xffff; + AArch64Ext ext = EXT(shift >= 32); + + tcg_out32(s, insn | ext | shift << 17 | half << 5 | rd); + + insn = INSN_MOVK; value &= ~(0xffffUL << shift); } while (value); } -static inline void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg rd, tcg_target_long value) -{ - if (type == TCG_TYPE_I64) { - tcg_out_movi_aux(s, rd, value); - } else { - tcg_out_movi_aux(s, rd, value & 0xffffffff); - } -} - static inline void tcg_out_ldst_r(TCGContext *s, enum aarch64_ldst_op_data op_data, enum aarch64_ldst_op_type op_type, @@ -475,19 +551,11 @@ static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data, tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP); } -/* mov alias implemented with add immediate, useful to move to/from SP */ -static inline void tcg_out_movr_sp(TCGContext *s, int ext, TCGReg rd, TCGReg rn) -{ - /* using ADD 0x11000000 | (ext) | rn << 5 | rd */ - unsigned int base = ext ? 0x91000000 : 0x11000000; - tcg_out32(s, base | rn << 5 | rd); -} - static inline void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { if (ret != arg) { - tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg); + tcg_out_movr(s, EXT(type == TCG_TYPE_I64), ret, arg); } } @@ -505,90 +573,35 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, arg, arg1, arg2); } -static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc, - int ext, TCGReg rd, TCGReg rn, TCGReg rm, - int shift_imm) -{ - /* Using shifted register arithmetic operations */ - /* if extended register operation (64bit) just OR with 0x80 << 24 */ - unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24; - if (shift_imm == 0) { - shift = 0; - } else if (shift_imm > 0) { - shift = shift_imm << 10 | 1 << 22; - } else /* (shift_imm < 0) */ { - shift = (-shift_imm) << 10; - } - tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd); -} - -static inline void tcg_out_aimm(TCGContext *s, enum aarch64_arith_opc opc, - int ext, TCGReg rd, TCGReg rn, uint64_t aimm) -{ - unsigned int base = (ext ? 0x80 | opc : opc) << 24; - - if (aimm > 0xfff) { - assert((aimm & 0xfff) == 0); - aimm >>= 12; - base |= 1 << 22; /* apply LSL 12 */ - assert(aimm <= 0xfff); - } - tcg_out32(s, base | (aimm << 10) | (rn << 5) | rd); -} - -static void tcg_out_limm(TCGContext *s, enum aarch64_arith_opc opc, int ext, - TCGReg rd, TCGReg rn, tcg_target_long val) -{ - int index = (ext ? find_bitmask64(val) : find_bitmask32(val)); - unsigned base = (ext ? 0x80 | opc : opc) << 24; - - assert(index >= 0); - tcg_out32(s, base | bitmask_enc[index] << 10 | rn << 5 | rd); -} - -static inline void tcg_out_mul(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, TCGReg rm) +static inline void tcg_out_data2(TCGContext *s, AArch64Insn insn, + AArch64Ext ext, TCGReg rd, + TCGReg rn, TCGReg rm) { - /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */ - unsigned int base = ext ? 0x9b007c00 : 0x1b007c00; - tcg_out32(s, base | rm << 16 | rn << 5 | rd); + tcg_out32(s, insn | ext | rm << 16 | rn << 5 | rd); } -static inline void tcg_out_shiftrot_reg(TCGContext *s, - enum aarch64_srr_opc opc, int ext, - TCGReg rd, TCGReg rn, TCGReg rm) +static inline void tcg_out_ubfm(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) { - /* using 2-source data processing instructions 0x1ac02000 */ - unsigned int base = ext ? 0x9ac02000 : 0x1ac02000; - tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd); -} - -static inline void tcg_out_ubfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn, - unsigned int a, unsigned int b) -{ - /* Using UBFM 0x53000000 Wd, Wn, a, b */ - unsigned int base = ext ? 0xd3400000 : 0x53000000; + unsigned int base = INSN_UBFM | (ext ? 0x80400000 : 0); tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd); } -static inline void tcg_out_sbfm(TCGContext *s, int ext, TCGReg rd, TCGReg rn, - unsigned int a, unsigned int b) +static inline void tcg_out_sbfm(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int a, unsigned int b) { - /* Using SBFM 0x13000000 Wd, Wn, a, b */ - unsigned int base = ext ? 0x93400000 : 0x13000000; + unsigned int base = INSN_SBFM | (ext ? 0x80400000 : 0); tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd); } -static inline void tcg_out_extr(TCGContext *s, int ext, TCGReg rd, +static inline void tcg_out_extr(TCGContext *s, AArch64Ext ext, TCGReg rd, TCGReg rn, TCGReg rm, unsigned int a) { - /* Using EXTR 0x13800000 Wd, Wn, Wm, a */ - unsigned int base = ext ? 0x93c00000 : 0x13800000; - tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd); + tcg_out32(s, INSN_EXTR | ext | rm << 16 | a << 10 | rn << 5 | rd); } -static inline void tcg_out_shl(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, unsigned int m) +static inline void tcg_out_shli(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int m) { int bits, max; bits = ext ? 64 : 32; @@ -596,29 +609,29 @@ static inline void tcg_out_shl(TCGContext *s, int ext, tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max)); } -static inline void tcg_out_shr(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, unsigned int m) +static inline void tcg_out_shri(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int m) { int max = ext ? 63 : 31; tcg_out_ubfm(s, ext, rd, rn, m & max, max); } -static inline void tcg_out_sar(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, unsigned int m) +static inline void tcg_out_sari(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int m) { int max = ext ? 63 : 31; tcg_out_sbfm(s, ext, rd, rn, m & max, max); } -static inline void tcg_out_rotr(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, unsigned int m) +static inline void tcg_out_rotri(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int m) { int max = ext ? 63 : 31; tcg_out_extr(s, ext, rd, rn, rn, m & max); } -static inline void tcg_out_rotl(TCGContext *s, int ext, - TCGReg rd, TCGReg rn, unsigned int m) +static inline void tcg_out_rotli(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int m) { int bits, max; bits = ext ? 64 : 32; @@ -626,29 +639,31 @@ static inline void tcg_out_rotl(TCGContext *s, int ext, tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max)); } -static void tcg_out_cmp(TCGContext *s, int ext, TCGReg a, +static void tcg_out_cmp(TCGContext *s, AArch64Ext ext, TCGReg a, tcg_target_long b, bool const_b) { if (const_b) { /* Using CMP alias SUBS xzr, Xn, const */ - enum aarch64_arith_opc opc = ARITH_SUBSI; + AArch64Insn insn = INSN_SUBSI; if (b < 0) { b = ~b; - opc = ARITH_ADDSI; + insn = INSN_ADDSI; } - tcg_out_aimm(s, opc, ext, TCG_REG_XZR, a, b); + tcg_out_aimm(s, insn, ext, TCG_REG_XZR, a, b); } else { /* Using CMP alias SUBS wzr, Wn, Wm */ - tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, a, b, 0); + tcg_out_arith(s, INSN_SUBS, ext, TCG_REG_XZR, a, b, 0); } } -static inline void tcg_out_cset(TCGContext *s, int ext, TCGReg rd, TCGCond c) +static inline void tcg_out_cset(TCGContext *s, AArch64Ext ext, + TCGReg rd, TCGCond c) { /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */ - unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0; - tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd); + unsigned int base = INSN_CSINC | TCG_REG_XZR << 16 | TCG_REG_XZR << 5; + unsigned int cond = tcg_cond_to_aarch64[tcg_invert_cond(c)]; + tcg_out32(s, base | ext | cond << 12 | rd); } static inline void tcg_out_goto(TCGContext *s, tcg_target_long target) @@ -661,7 +676,7 @@ static inline void tcg_out_goto(TCGContext *s, tcg_target_long target) tcg_abort(); } - tcg_out32(s, 0x14000000 | (offset & 0x03ffffff)); + tcg_out32(s, INSN_B | (offset & 0x03ffffff)); } static inline void tcg_out_goto_noaddr(TCGContext *s) @@ -672,7 +687,7 @@ static inline void tcg_out_goto_noaddr(TCGContext *s) Mask away possible garbage in the high bits for the first translation, while keeping the offset bits for retranslation. */ uint32_t insn; - insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000; + insn = (tcg_in32(s) & 0x03ffffff) | INSN_B; tcg_out32(s, insn); } @@ -681,7 +696,7 @@ static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c) /* see comments in tcg_out_goto_noaddr */ uint32_t insn; insn = tcg_in32(s) & (0x07ffff << 5); - insn |= 0x54000000 | tcg_cond_to_aarch64[c]; + insn |= INSN_B_C | tcg_cond_to_aarch64[c]; tcg_out32(s, insn); } @@ -697,17 +712,17 @@ static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, } offset &= 0x7ffff; - tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5); + tcg_out32(s, INSN_B_C | tcg_cond_to_aarch64[c] | offset << 5); } static inline void tcg_out_callr(TCGContext *s, TCGReg reg) { - tcg_out32(s, 0xd63f0000 | reg << 5); + tcg_out32(s, INSN_BLR | reg << 5); } static inline void tcg_out_gotor(TCGContext *s, TCGReg reg) { - tcg_out32(s, 0xd61f0000 | reg << 5); + tcg_out32(s, INSN_BR | reg << 5); } static inline void tcg_out_call(TCGContext *s, tcg_target_long target) @@ -720,7 +735,7 @@ static inline void tcg_out_call(TCGContext *s, tcg_target_long target) tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target); tcg_out_callr(s, TCG_REG_TMP); } else { - tcg_out32(s, 0x94000000 | (offset & 0x03ffffff)); + tcg_out32(s, INSN_BL | (offset & 0x03ffffff)); } } @@ -741,7 +756,7 @@ aarch64_limm(unsigned int m, unsigned int r) to test a 32bit reg against 0xff000000, pass M = 8, R = 8. to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8. */ -static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn, +static inline void tcg_out_tst(TCGContext *s, AArch64Ext ext, TCGReg rn, unsigned int m, unsigned int r) { /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */ @@ -750,8 +765,8 @@ static inline void tcg_out_tst(TCGContext *s, int ext, TCGReg rn, } /* and a register with a bit pattern, similarly to TST, no flags change */ -static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn, - unsigned int m, unsigned int r) +static inline void tcg_out_andi(TCGContext *s, AArch64Ext ext, TCGReg rd, + TCGReg rn, unsigned int m, unsigned int r) { /* using AND 0x12000000 */ unsigned int base = ext ? 0x92400000 : 0x12000000; @@ -760,8 +775,7 @@ static inline void tcg_out_andi(TCGContext *s, int ext, TCGReg rd, TCGReg rn, static inline void tcg_out_ret(TCGContext *s) { - /* emit RET { LR } */ - tcg_out32(s, 0xd65f03c0); + tcg_out32(s, INSN_RET | TCG_REG_LR << 5); } void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) @@ -804,25 +818,23 @@ static inline void tcg_out_goto_label_cond(TCGContext *s, } } -static inline void tcg_out_rev(TCGContext *s, int ext, TCGReg rd, TCGReg rm) +static inline void tcg_out_rev(TCGContext *s, AArch64Ext ext, + TCGReg rd, TCGReg rm) { - /* using REV 0x5ac00800 */ - unsigned int base = ext ? 0xdac00c00 : 0x5ac00800; - tcg_out32(s, base | rm << 5 | rd); + AArch64Insn insn = ext ? INSN_REVx : INSN_REVw; + tcg_out32(s, insn | rm << 5 | rd); } -static inline void tcg_out_rev16(TCGContext *s, int ext, TCGReg rd, TCGReg rm) +static inline void tcg_out_rev16(TCGContext *s, AArch64Ext ext, + TCGReg rd, TCGReg rm) { - /* using REV16 0x5ac00400 */ - unsigned int base = ext ? 0xdac00400 : 0x5ac00400; - tcg_out32(s, base | rm << 5 | rd); + tcg_out32(s, INSN_REV16 | ext | rm << 5 | rd); } -static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits, +static inline void tcg_out_sxt(TCGContext *s, AArch64Ext ext, int s_bits, TCGReg rd, TCGReg rn) { - /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00 - of SBFM Xd, Xn, #0, #7|15|31 */ + /* using ALIASes SXTB, SXTH, SXTW of SBFM Xd, Xn, #0, #7|15|31 */ int bits = 8 * (1 << s_bits) - 1; tcg_out_sbfm(s, ext, rd, rn, 0, bits); } @@ -830,38 +842,37 @@ static inline void tcg_out_sxt(TCGContext *s, int ext, int s_bits, static inline void tcg_out_uxt(TCGContext *s, int s_bits, TCGReg rd, TCGReg rn) { - /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00 - of UBFM Wd, Wn, #0, #7|15 */ + /* using ALIASes UXTB, UXTH, of UBFM Wd, Wn, #0, #7|15 */ int bits = 8 * (1 << s_bits) - 1; tcg_out_ubfm(s, 0, rd, rn, 0, bits); } -static void tcg_out_addi(TCGContext *s, int ext, TCGReg rd, TCGReg rn, +static void tcg_out_addi(TCGContext *s, AArch64Ext ext, TCGReg rd, TCGReg rn, tcg_target_long aimm) { - enum aarch64_arith_opc opc = ARITH_ADDI; + AArch64Insn insn = INSN_ADDI; tcg_target_long lo, hi; if (aimm < 0) { aimm = -aimm; - opc = ARITH_SUBI; + insn = INSN_SUBI; } hi = aimm & 0xfff000; lo = aimm & 0xfff; assert(aimm == hi + lo); if (hi != 0) { - tcg_out_aimm(s, opc, ext, rd, rn, hi); + tcg_out_aimm(s, insn, ext, rd, rn, hi); rn = rd; } if (lo != 0 || rd != rn) { - tcg_out_aimm(s, opc, ext, rd, rn, lo); + tcg_out_aimm(s, insn, ext, rd, rn, lo); } } static inline void tcg_out_nop(TCGContext *s) { - tcg_out32(s, 0xd503201f); + tcg_out32(s, INSN_NOP); } #ifdef CONFIG_SOFTMMU @@ -887,17 +898,23 @@ static const void * const qemu_st_helpers[4] = { static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { + int opc = lb->opc; + int s_bits = opc & 3; + reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr); - tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); - tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); + + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, lb->addrlo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index); tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, - (tcg_target_long)qemu_ld_helpers[lb->opc & 3]); + (tcg_target_long)qemu_ld_helpers[s_bits]); tcg_out_callr(s, TCG_REG_TMP); - if (lb->opc & 0x04) { - tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0); + + if (opc & 0x04) { + tcg_out_sxt(s, E64, s_bits, lb->datalo_reg, TCG_REG_X0); } else { - tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0); + tcg_out_mov(s, s_bits == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32, + lb->datalo_reg, TCG_REG_X0); } tcg_out_goto(s, (tcg_target_long)lb->raddr); @@ -905,14 +922,17 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { + int s_bits = lb->opc; + reloc_pc19(lb->label_ptr[0], (tcg_target_long)s->code_ptr); - tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0); - tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg); - tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0); + tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_X1, lb->addrlo_reg); + tcg_out_mov(s, s_bits == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32, + TCG_REG_X2, lb->datalo_reg); tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index); tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, - (tcg_target_long)qemu_st_helpers[lb->opc & 3]); + (tcg_target_long)qemu_st_helpers[s_bits]); tcg_out_callr(s, TCG_REG_TMP); tcg_out_nop(s); @@ -980,10 +1000,10 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, /* Add any "high bits" from the tlb offset to the env address into X2, to take advantage of the LSL12 form of the addi instruction. X2 = env + (tlb_offset & 0xfff000) */ - tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000); + tcg_out_addi(s, E64, TCG_REG_X2, base, tlb_offset & 0xfff000); /* Merge the tlb index contribution into X2. X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */ - tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2, + tcg_out_arith(s, INSN_ADD, E64, TCG_REG_X2, TCG_REG_X2, TCG_REG_X0, -CPU_TLB_ENTRY_BITS); /* Merge "low bits" from tlb offset, load the tlb comparator into X0. X0 = load [X2 + (tlb_offset & 0x000fff)] */ @@ -997,7 +1017,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, (is_read ? offsetof(CPUTLBEntry, addr_read) : offsetof(CPUTLBEntry, addr_write))); /* Perform the address comparison. */ - tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); + tcg_out_cmp(s, EXT(TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0); *label_ptr = s->code_ptr; /* If not equal, we jump to the slow path. */ tcg_out_goto_cond_noaddr(s, TCG_COND_NE); @@ -1024,8 +1044,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r, case 1 | 4: if (TCG_LDST_BSWAP) { tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r); - tcg_out_rev16(s, 0, data_r, data_r); - tcg_out_sxt(s, 1, 1, data_r, data_r); + tcg_out_rev16(s, E32, data_r, data_r); + tcg_out_sxt(s, E64, 1, data_r, data_r); } else { tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r); } @@ -1033,14 +1053,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r, case 2: tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r); if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 0, data_r, data_r); + tcg_out_rev(s, E32, data_r, data_r); } break; case 2 | 4: if (TCG_LDST_BSWAP) { tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r); - tcg_out_rev(s, 0, data_r, data_r); - tcg_out_sxt(s, 1, 2, data_r, data_r); + tcg_out_rev(s, E32, data_r, data_r); + tcg_out_sxt(s, E64, 2, data_r, data_r); } else { tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r); } @@ -1048,7 +1068,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r, case 3: tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r); if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 1, data_r, data_r); + tcg_out_rev(s, E64, data_r, data_r); } break; default: @@ -1065,7 +1085,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r, break; case 1: if (TCG_LDST_BSWAP) { - tcg_out_rev16(s, 0, TCG_REG_TMP, data_r); + tcg_out_rev16(s, E32, TCG_REG_TMP, data_r); tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r); } else { tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r); @@ -1073,7 +1093,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r, break; case 2: if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 0, TCG_REG_TMP, data_r); + tcg_out_rev(s, E32, TCG_REG_TMP, data_r); tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r); } else { tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r); @@ -1081,7 +1101,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r, break; case 3: if (TCG_LDST_BSWAP) { - tcg_out_rev(s, 1, TCG_REG_TMP, data_r); + tcg_out_rev(s, E64, TCG_REG_TMP, data_r); tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r); } else { tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r); @@ -1196,7 +1216,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, { /* ext will be set in the switch below, which will fall through to the common code. It triggers the use of extended regs where appropriate. */ - int ext = 0; + AArch64Ext ext = E32; switch (opc) { case INDEX_op_exit_tb: @@ -1252,9 +1272,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_mov_i64: - ext = 1; /* fall through */ + tcg_out_mov(s, TCG_TYPE_I64, args[0], args[1]); + break; case INDEX_op_mov_i32: - tcg_out_movr(s, ext, args[0], args[1]); + tcg_out_mov(s, TCG_TYPE_I32, args[0], args[1]); break; case INDEX_op_movi_i64: @@ -1265,123 +1286,122 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_add_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_add_i32: if (const_args[2]) { tcg_out_addi(s, ext, args[0], args[1], args[2]); } else { - tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0); + tcg_out_arith(s, INSN_ADD, ext, args[0], args[1], args[2], 0); } break; case INDEX_op_sub_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_sub_i32: if (const_args[2]) { tcg_out_addi(s, ext, args[0], args[1], -args[2]); } else { - tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0); + tcg_out_arith(s, INSN_SUB, ext, args[0], args[1], args[2], 0); } break; case INDEX_op_and_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_and_i32: if (const_args[2]) { - tcg_out_limm(s, ARITH_ANDI, ext, args[0], args[1], args[2]); + tcg_out_limm(s, INSN_ANDI, ext, args[0], args[1], args[2]); } else { - tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0); + tcg_out_arith(s, INSN_AND, ext, args[0], args[1], args[2], 0); } break; case INDEX_op_or_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_or_i32: if (const_args[2]) { - tcg_out_limm(s, ARITH_ORI, ext, args[0], args[1], args[2]); + tcg_out_limm(s, INSN_ORRI, ext, args[0], args[1], args[2]); } else { - tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0); + tcg_out_arith(s, INSN_ORR, ext, args[0], args[1], args[2], 0); } break; case INDEX_op_xor_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_xor_i32: if (const_args[2]) { - tcg_out_limm(s, ARITH_XORI, ext, args[0], args[1], args[2]); + tcg_out_limm(s, INSN_EORI, ext, args[0], args[1], args[2]); } else { - tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0); + tcg_out_arith(s, INSN_EOR, ext, args[0], args[1], args[2], 0); } break; case INDEX_op_mul_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_mul_i32: - tcg_out_mul(s, ext, args[0], args[1], args[2]); + tcg_out_data2(s, INSN_MUL, ext, args[0], args[1], args[2]); break; case INDEX_op_shl_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_shl_i32: if (const_args[2]) { /* LSL / UBFM Wd, Wn, (32 - m) */ - tcg_out_shl(s, ext, args[0], args[1], args[2]); + tcg_out_shli(s, ext, args[0], args[1], args[2]); } else { /* LSL / LSLV */ - tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]); + tcg_out_data2(s, INSN_LSLV, ext, args[0], args[1], args[2]); } break; case INDEX_op_shr_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_shr_i32: if (const_args[2]) { /* LSR / UBFM Wd, Wn, m, 31 */ - tcg_out_shr(s, ext, args[0], args[1], args[2]); + tcg_out_shri(s, ext, args[0], args[1], args[2]); } else { /* LSR / LSRV */ - tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]); + tcg_out_data2(s, INSN_LSRV, ext, args[0], args[1], args[2]); } break; case INDEX_op_sar_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_sar_i32: if (const_args[2]) { /* ASR / SBFM Wd, Wn, m, 31 */ - tcg_out_sar(s, ext, args[0], args[1], args[2]); + tcg_out_sari(s, ext, args[0], args[1], args[2]); } else { /* ASR / ASRV */ - tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]); + tcg_out_data2(s, INSN_ASRV, ext, args[0], args[1], args[2]); } break; case INDEX_op_rotr_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_rotr_i32: if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, m */ - tcg_out_rotr(s, ext, args[0], args[1], args[2]); + tcg_out_rotri(s, ext, args[0], args[1], args[2]); } else { /* ROR / RORV */ - tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]); + tcg_out_data2(s, INSN_RORV, ext, args[0], args[1], args[2]); } break; case INDEX_op_rotl_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */ if (const_args[2]) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */ - tcg_out_rotl(s, ext, args[0], args[1], args[2]); + tcg_out_rotli(s, ext, args[0], args[1], args[2]); } else { - tcg_out_arith(s, ARITH_SUB, 0, + tcg_out_arith(s, INSN_SUB, E32, TCG_REG_TMP, TCG_REG_XZR, args[2], 0); - tcg_out_shiftrot_reg(s, SRR_ROR, ext, - args[0], args[1], TCG_REG_TMP); + tcg_out_data2(s, INSN_RORV, ext, args[0], args[1], TCG_REG_TMP); } break; case INDEX_op_brcond_i64: - ext = 1; /* fall through */ - case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */ + ext = E64; /* fall through */ + case INDEX_op_brcond_i32: tcg_out_cmp(s, ext, args[0], args[1], const_args[1]); tcg_out_goto_label_cond(s, args[2], args[3]); break; case INDEX_op_setcond_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_setcond_i32: tcg_out_cmp(s, ext, args[1], args[2], const_args[2]); tcg_out_cset(s, 0, args[0], args[3]); @@ -1425,7 +1445,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_bswap64_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_bswap32_i64: case INDEX_op_bswap32_i32: tcg_out_rev(s, ext, args[0], args[1]); @@ -1436,17 +1456,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_ext8s_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_ext8s_i32: tcg_out_sxt(s, ext, 0, args[0], args[1]); break; case INDEX_op_ext16s_i64: - ext = 1; /* fall through */ + ext = E64; /* fall through */ case INDEX_op_ext16s_i32: tcg_out_sxt(s, ext, 1, args[0], args[1]); break; case INDEX_op_ext32s_i64: - tcg_out_sxt(s, 1, 2, args[0], args[1]); + tcg_out_sxt(s, E64, 2, args[0], args[1]); break; case INDEX_op_ext8u_i64: case INDEX_op_ext8u_i32: @@ -1457,7 +1477,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_uxt(s, 1, args[0], args[1]); break; case INDEX_op_ext32u_i64: - tcg_out_movr(s, 0, args[0], args[1]); + tcg_out_movr(s, E32, args[0], args[1]); break; default: @@ -1616,7 +1636,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved); /* FP -> callee_saved */ - tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_FP, TCG_REG_SP); /* store callee-preserved regs x19..x28 using FP -> callee_saved */ for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) { @@ -1625,7 +1645,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) } /* make stack space for TCG locals */ - tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP, + tcg_out_addi(s, E64, TCG_REG_SP, TCG_REG_SP, -frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); /* inform TCG about how to find TCG locals with register, offset, size */ tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, @@ -1644,7 +1664,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tb_ret_addr = s->code_ptr; /* remove TCG locals stack space */ - tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP, + tcg_out_addi(s, E64, TCG_REG_SP, TCG_REG_SP, frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN); /* restore registers x19..x28. -- 1.8.3.1