Emscripten's Fiber coroutine implements coroutine switching using Asyncify's stack unwinding and rewinding features [1]. When a coroutine yields (i.e. switches out), Asyncify unwinds the stack, returning control to Emscripten's JS code (Fiber.trampoline()). Then execution resumes in the target coroutine by rewinding the stack. Stack unwinding is implemented by a sequence of immediate function returns, while rewinding re-enters the functions in the call stack, skipping any code between the function's entry point and the original call position [2].
This commit updates the TB's Wasm module to allow helper functions to trigger coroutine switching. Particaully, the TB handles the unwinding and rewinding flows as follows: - The TB check the Asyncify.state JS object after each helper call. If unwinding is in progress, the TB immediately returns to the caller so that the unwinding can continue. - Each function call is preceded by a block boundary and an update of the BLOCK_IDX variable. This enables rewinding to skip any code between the function's entry point and the original call position. Additionally, this commit introduces WasmContext.do_init which is a flag indicating whether the TB should reset the BLOCK_IDX variable to 0 (i.e. start from the beginning). call_wasm_tb is a newly introduced wrapper function for the Wasm module's entrypoint and this sets "do_init = 1" to ensure normal TB execution begins at the first block. During a rewinding, the C code does not set do_init to 1, allowing the TB to preserve the BLOCK_IDX value from the previous unwinding and correctly resume execution from the last unwound block. [1] https://emscripten.org/docs/api_reference/fiber.h.html [2] https://kripken.github.io/blog/wasm/2019/07/16/asyncify.html#new-asyncify Signed-off-by: Kohei Tokunaga <ktokunaga.m...@gmail.com> --- tcg/wasm.c | 3 ++ tcg/wasm.h | 11 ++++++++ tcg/wasm/tcg-target.c.inc | 58 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 71 insertions(+), 1 deletion(-) diff --git a/tcg/wasm.c b/tcg/wasm.c index 15db1f9a8a..82987e9dff 100644 --- a/tcg/wasm.c +++ b/tcg/wasm.c @@ -44,6 +44,9 @@ EM_JS_PRE(void*, instantiate_wasm, (void *wasm_begin, const wasm = HEAP8.subarray(DEC_PTR(wasm_begin), DEC_PTR(wasm_begin) + wasm_size); var helper = {}; + helper.u = () => { + return (Asyncify.state != Asyncify.State.Unwinding) ? 1 : 0; + }; const entsize = TCG_TARGET_REG_BITS / 8; for (var i = 0; i < import_vec_size / entsize; i++) { const idx = memory_v.getBigInt64( diff --git a/tcg/wasm.h b/tcg/wasm.h index b5d9ce75da..fdde908557 100644 --- a/tcg/wasm.h +++ b/tcg/wasm.h @@ -30,11 +30,22 @@ struct WasmContext { * Pointer to a stack array. */ uint64_t *stack; + + /* + * Flag indicating whether to initialize the block index(1) or not(0). + */ + uint32_t do_init; }; /* Instantiated Wasm function of a TB */ typedef uintptr_t (*wasm_tb_func)(struct WasmContext *); +static inline uintptr_t call_wasm_tb(wasm_tb_func f, struct WasmContext *ctx) +{ + ctx->do_init = 1; /* reset the block index (rewinding will skip this) */ + return f(ctx); +} + /* * A TB of the Wasm backend starts from a header which contains pointers for * each data stored in the following region in the TB. diff --git a/tcg/wasm/tcg-target.c.inc b/tcg/wasm/tcg-target.c.inc index 7663f03eaf..6af4d6eb07 100644 --- a/tcg/wasm/tcg-target.c.inc +++ b/tcg/wasm/tcg-target.c.inc @@ -152,7 +152,8 @@ static const uint8_t tcg_target_reg_index[TCG_TARGET_NB_REGS] = { #define CTX_IDX 0 /* Function index */ -#define HELPER_IDX_START 0 /* The first index of helper functions */ +#define CHECK_UNWINDING_IDX 0 /* A function to check the Asyncify status */ +#define HELPER_IDX_START 1 /* The first index of helper functions */ #define PTR_TYPE 0x7e @@ -169,6 +170,7 @@ typedef enum { OPC_GLOBAL_GET = 0x23, OPC_GLOBAL_SET = 0x24, + OPC_I32_LOAD = 0x28, OPC_I64_LOAD = 0x29, OPC_I64_LOAD8_S = 0x30, OPC_I64_LOAD8_U = 0x31, @@ -176,6 +178,7 @@ typedef enum { OPC_I64_LOAD16_U = 0x33, OPC_I64_LOAD32_S = 0x34, OPC_I64_LOAD32_U = 0x35, + OPC_I32_STORE = 0x36, OPC_I64_STORE = 0x37, OPC_I64_STORE8 = 0x3c, OPC_I64_STORE16 = 0x3d, @@ -1116,6 +1119,17 @@ static int64_t get_helper_idx(TCGContext *s, intptr_t helper_idx_on_qemu) return -1; } +static void tcg_wasm_out_handle_unwinding(TCGContext *s) +{ + tcg_wasm_out_op_idx(s, OPC_CALL, CHECK_UNWINDING_IDX); + tcg_wasm_out_op(s, OPC_I32_EQZ); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); + tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); + /* returns if unwinding */ + tcg_wasm_out_op(s, OPC_RETURN); + tcg_wasm_out_op(s, OPC_END); +} + static void tcg_wasm_out_call(TCGContext *s, intptr_t func, const TCGHelperInfo *info) { @@ -1132,7 +1146,16 @@ static void tcg_wasm_out_call(TCGContext *s, intptr_t func, tcg_wasm_out_op_const(s, OPC_I64_CONST, (uint64_t)s->code_ptr); tcg_wasm_out_op_ldst(s, OPC_I64_STORE, 0, ofs); + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + gen_call(s, info, func_idx); + tcg_wasm_out_handle_unwinding(s); } static void gen_func_type_qemu_ld(TCGContext *s, uint32_t oi) @@ -1204,6 +1227,14 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, gen_func_type_qemu_ld(s, oi); } + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + /* call the target helper */ tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); @@ -1212,6 +1243,7 @@ static void tcg_wasm_out_qemu_ld(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(data_reg)); + tcg_wasm_out_handle_unwinding(s); } static void *qemu_st_helper_ptr(uint32_t oi) @@ -1245,6 +1277,14 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, gen_func_type_qemu_st(s, oi); } + /* + * update the block index so that the possible rewinding will + * skip this block + */ + tcg_wasm_out_op_const(s, OPC_I64_CONST, cur_block_idx + 1); + tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + tcg_wasm_out_new_block(s); + /* call the target helper */ tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(TCG_AREG0)); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, REG_IDX(addr_reg)); @@ -1261,6 +1301,7 @@ static void tcg_wasm_out_qemu_st(TCGContext *s, TCGReg data_reg, tcg_wasm_out_op_const(s, OPC_I64_CONST, (intptr_t)s->code_ptr); tcg_wasm_out_op_idx(s, OPC_CALL, func_idx); + tcg_wasm_out_handle_unwinding(s); } static void tcg_out_op_l(TCGContext *s, TCGOpcode op, TCGLabel *l0) @@ -2264,6 +2305,9 @@ static const uint8_t mod_1[] = { 0x60, /* 0: Type of "start" function */ 0x01, PTR_TYPE, /* arg: ctx pointer */ 0x01, PTR_TYPE, /* return: res */ + 0x60, /* 1: Type of the asyncify helper */ + 0x0, /* no argument */ + 0x01, 0x7f, /* return: res (i32) */ }; #define MOD_1_PH_TYPE_SECTION_SIZE_OFF 9 @@ -2289,6 +2333,9 @@ static const uint8_t mod_2[] = { 0x02, 0x07, /* shared mem(64bit) */ 0x00, 0x80, 0x80, 0x10, /* min: 0, max: 262144 pages */ #endif + 0x06, 0x68, 0x65, 0x6c, 0x70, 0x65, 0x72, /* module: "helper" */ + 0x01, 0x75, /* name: "u" */ + 0x00, 0x01, /* func type 1 */ }; #define MOD_2_PH_IMPORT_SECTION_SIZE_OFF 1 @@ -2427,8 +2474,17 @@ static void tcg_out_tb_start(TCGContext *s) tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, REG_IDX(TCG_REG_CALL_STACK)); tcg_wasm_out_op(s, OPC_END); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(do_init)); + tcg_wasm_out_op_ldst(s, OPC_I32_LOAD, 0, ofs); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op(s, OPC_I32_NE); + tcg_wasm_out_op_block(s, OPC_IF, BLOCK_NORET); tcg_wasm_out_op_const(s, OPC_I64_CONST, 0); tcg_wasm_out_op_idx(s, OPC_GLOBAL_SET, BLOCK_IDX); + ofs = tcg_wasm_out_get_ctx(s, CTX_OFFSET(do_init)); + tcg_wasm_out_op_const(s, OPC_I32_CONST, 0); + tcg_wasm_out_op_ldst(s, OPC_I32_STORE, 0, ofs); + tcg_wasm_out_op(s, OPC_END); tcg_wasm_out_op_block(s, OPC_LOOP, BLOCK_NORET); tcg_wasm_out_op_idx(s, OPC_GLOBAL_GET, BLOCK_IDX); -- 2.43.0