The rot/clz/ctz operations are implemented using the corresponding instructions in Wasm. TCI instructions are also generated in the same way as the original TCI backend.
Signed-off-by: Kohei Tokunaga <ktokunaga.m...@gmail.com> --- tcg/wasm.c | 34 +++++++++++ tcg/wasm/tcg-target-opc.h.inc | 4 ++ tcg/wasm/tcg-target.c.inc | 107 ++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+) V2: - This commit generates both Wasm and TCI instrucitons. diff --git a/tcg/wasm.c b/tcg/wasm.c index a5e72d8fe5..2688ded58a 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -363,6 +363,40 @@ static uintptr_t tcg_qemu_tb_exec_tci(CPUArchState *env, const void *v_tb_ptr) tci_args_rr(insn, &r0, &r1); regs[r0] = ctpop64(regs[r1]); break; + case INDEX_op_clz: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? clz64(regs[r1]) : regs[r2]; + break; + case INDEX_op_ctz: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2]; + break; + case INDEX_op_tci_clz32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? clz32(tmp32) : regs[r2]; + break; + case INDEX_op_tci_ctz32: + tci_args_rrr(insn, &r0, &r1, &r2); + tmp32 = regs[r1]; + regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2]; + break; + case INDEX_op_rotl: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = rol64(regs[r1], regs[r2] & 63); + break; + case INDEX_op_rotr: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ror64(regs[r1], regs[r2] & 63); + break; + case INDEX_op_tci_rotl32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = rol32(regs[r1], regs[r2] & 31); + break; + case INDEX_op_tci_rotr32: + tci_args_rrr(insn, &r0, &r1, &r2); + regs[r0] = ror32(regs[r1], regs[r2] & 31); + break; default: g_assert_not_reached(); } diff --git a/tcg/wasm/tcg-target-opc.h.inc b/tcg/wasm/tcg-target-opc.h.inc index 5ed8c67535..092a5086ec 100644 --- a/tcg/wasm/tcg-target-opc.h.inc +++ b/tcg/wasm/tcg-target-opc.h.inc @@ -12,3 +12,7 @@ DEF(tci_divs32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_divu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_rems32, 1, 2, 0, TCG_OPF_NOT_PRESENT) DEF(tci_remu32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_clz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_ctz32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotl32, 1, 2, 0, TCG_OPF_NOT_PRESENT) +DEF(tci_rotr32, 1, 2, 0, TCG_OPF_NOT_PRESENT) diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 27f3a7414b..d547e7bf09 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -158,6 +158,7 @@ typedef enum { OPC_I32_CONST = 0x41, OPC_I64_CONST = 0x42, + OPC_I32_EQZ = 0x45, OPC_I32_EQ = 0x46, OPC_I32_NE = 0x47, OPC_I32_LT_S = 0x48, @@ -169,6 +170,7 @@ typedef enum { OPC_I32_GE_S = 0x4e, OPC_I32_GE_U = 0x4f, + OPC_I64_EQZ = 0x50, OPC_I64_EQ = 0x51, OPC_I64_NE = 0x52, OPC_I64_LT_S = 0x53, @@ -180,6 +182,8 @@ typedef enum { OPC_I64_GE_S = 0x59, OPC_I64_GE_U = 0x5a, + OPC_I32_CLZ = 0x67, + OPC_I32_CTZ = 0x68, OPC_I32_ADD = 0x6a, OPC_I32_DIV_S = 0x6d, OPC_I32_DIV_U = 0x6e, @@ -187,7 +191,11 @@ typedef enum { OPC_I32_REM_U = 0x70, OPC_I32_SHR_S = 0x75, OPC_I32_SHR_U = 0x76, + OPC_I32_ROTL = 0x77, + OPC_I32_ROTR = 0x78, + OPC_I64_CLZ = 0x79, + OPC_I64_CTZ = 0x7a, OPC_I64_POPCNT = 0x7b, OPC_I64_ADD = 0x7c, OPC_I64_SUB = 0x7d, @@ -202,6 +210,8 @@ typedef enum { OPC_I64_SHL = 0x86, OPC_I64_SHR_S = 0x87, OPC_I64_SHR_U = 0x88, + OPC_I64_ROTL = 0x89, + OPC_I64_ROTR = 0x8a, OPC_I32_WRAP_I64 = 0xa7, OPC_I64_EXTEND_I32_S = 0xac, @@ -212,6 +222,7 @@ typedef enum { typedef enum { BLOCK_I64 = 0x7e, + BLOCK_I32 = 0x7f, } WasmBlockType; #define BUF_SIZE 1024 @@ -563,6 +574,42 @@ static void tcg_wasm_out_ctpop64(TCGContext *s, TCGReg ret, TCGReg arg) tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); } +static void tcg_wasm_out_cz( + TCGContext *s, TCGType type, WasmInsn opc32, WasmInsn opc64, + TCGReg ret, TCGReg arg1, TCGReg arg2) +{ + switch (type) { + case TCG_TYPE_I32: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I32); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I32_WRAP_I64); + tcg_wasm_out_op(s, opc32); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op(s, OPC_I64_EXTEND_I32_U); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + case TCG_TYPE_I64: + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, OPC_I64_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_I64); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg2)); + tcg_wasm_out_op(s, OPC_ELSE); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(arg1)); + tcg_wasm_out_op(s, opc64); + tcg_wasm_out_op(s, OPC_END); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(ret)); + break; + default: + g_assert_not_reached(); + } +} + static void tcg_out_op_ri(TCGContext *s, TCGOpcode op, TCGReg r0, int32_t i1) { tcg_insn_unit_tci insn = 0; @@ -1182,6 +1229,66 @@ static const TCGOutOpUnary outop_ctpop = { .out_rr = tgen_ctpop, }; +static void tgen_rotl(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotl32 + : INDEX_op_rotl); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_ROTL, OPC_I64_ROTL, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotl = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotl, +}; + +static void tgen_rotr(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_rotr32 + : INDEX_op_rotr); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_o1_i2_type(s, type, OPC_I32_ROTR, OPC_I64_ROTR, a0, a1, a2); +} + +static const TCGOutOpBinary outop_rotr = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_rotr, +}; + +static void tgen_clz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_clz32 + : INDEX_op_clz); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_cz(s, type, OPC_I32_CLZ, OPC_I64_CLZ, a0, a1, a2); +} + +static const TCGOutOpBinary outop_clz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_clz, +}; + +static void tgen_ctz(TCGContext *s, TCGType type, + TCGReg a0, TCGReg a1, TCGReg a2) +{ + TCGOpcode opc = (type == TCG_TYPE_I32 + ? INDEX_op_tci_ctz32 + : INDEX_op_ctz); + tcg_out_op_rrr(s, opc, a0, a1, a2); + tcg_wasm_out_cz(s, type, OPC_I32_CTZ, OPC_I64_CTZ, a0, a1, a2); +} + +static const TCGOutOpBinary outop_ctz = { + .base.static_constraint = C_O1_I2(r, r, r), + .out_rrr = tgen_ctz, +}; + static void tcg_out_tb_start(TCGContext *s) { init_sub_buf(); -- 2.43.0