Hi Kohei,
On 7/4/25 16:45, Kohei Tokunaga wrote:
A TB consists of a wasmTBHeader followed by the data listed below. The
wasmTBHeader contains pointers for each element:
- TCI code
- Wasm code
- Array of function indices imported into the Wasm instance
- Counter tracking the number of TB executions
- Pointer to the Wasm instance information
The Wasm backend (tcg/wasm32.c) and Wasm instances running on the same
thread share information, such as CPUArchState, through a wasmContext
structure. The Wasm backend defines tcg_qemu_tb_exec as a common entry point
for TBs, similar to the TCI backend. tcg_qemu_tb_exec runs TBs on a forked
TCI interpreter by default, while compiles and executes frequently executed
TBs as Wasm.
The code generator (tcg/wasm32) receives TCG IR and generates both Wasm and
TCI instructions. Since Wasm cannot directly jump to specific addresses,
labels are implemented using Wasm control flow instructions. As shown in the
pseudo-code below, a TB wraps instructions in a large loop, where codes are
placed within if blocks separated by labels. Branching is handled by
breaking from the current block and entering the target block.
loop
if
... code after label1
end
if
... code after label2
end
...
end
Additionally, the Wasm backend differs from other backends in several ways:
- goto_tb and goto_ptr return control to tcg_qemu_tb_exec which runs the
target TB
- Helper function pointers are stored in an array in TB and imported into
the Wasm instance on execution
- Wasm TBs lack prologue and epilogue. TBs are executed via tcg_qemu_tb_exec
Browsers cause out of memory error if too many Wasm instances are
created. To prevent this, the Wasm backend tracks active instances using an
array. When instantiating a new instance risks exceeding the limit, the
backend removes older instances to avoid browser errors. These removed
instances are re-instantiated when needed.
Signed-off-by: Kohei Tokunaga <ktokunaga.m...@gmail.com>
---
include/accel/tcg/getpc.h | 2 +-
include/tcg/helper-info.h | 4 +-
include/tcg/tcg.h | 2 +-
meson.build | 2 +
tcg/meson.build | 5 +
tcg/tcg.c | 26 +-
tcg/wasm32.c | 1260 +++++++++
tcg/wasm32.h | 39 +
tcg/wasm32/tcg-target-con-set.h | 18 +
tcg/wasm32/tcg-target-con-str.h | 8 +
tcg/wasm32/tcg-target-has.h | 102 +
tcg/wasm32/tcg-target-mo.h | 12 +
tcg/wasm32/tcg-target-opc.h.inc | 4 +
tcg/wasm32/tcg-target-reg-bits.h | 12 +
tcg/wasm32/tcg-target.c.inc | 4484 ++++++++++++++++++++++++++++++
tcg/wasm32/tcg-target.h | 65 +
16 files changed, 6035 insertions(+), 10 deletions(-)
create mode 100644 tcg/wasm32.c
create mode 100644 tcg/wasm32.h
create mode 100644 tcg/wasm32/tcg-target-con-set.h
create mode 100644 tcg/wasm32/tcg-target-con-str.h
create mode 100644 tcg/wasm32/tcg-target-has.h
create mode 100644 tcg/wasm32/tcg-target-mo.h
create mode 100644 tcg/wasm32/tcg-target-opc.h.inc
create mode 100644 tcg/wasm32/tcg-target-reg-bits.h
create mode 100644 tcg/wasm32/tcg-target.c.inc
create mode 100644 tcg/wasm32/tcg-target.h
diff --git a/tcg/tcg.c b/tcg/tcg.c
index dfd48b8264..154a4dafa7 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -136,6 +136,10 @@ static void tcg_out_goto_tb(TCGContext *s, int which);
static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS]);
+#if defined(EMSCRIPTEN)
Maybe we can let this independently of EMSCRIPTEN, to reduce #ifdef'ry.
+static void tcg_out_label_cb(TCGContext *s, TCGLabel *l);
+static int tcg_out_tb_end(TCGContext *s);
+#endif
#if TCG_TARGET_MAYBE_vec
static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src);
@@ -251,7 +255,7 @@ TCGv_env tcg_env;
const void *tcg_code_gen_epilogue;
uintptr_t tcg_splitwx_diff;
-#ifndef CONFIG_TCG_INTERPRETER
+#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN)
s/&&/||/ otherwise breaks TCI? (various cases)
tcg_prologue_fn *tcg_qemu_tb_exec;
#endif
@@ -358,6 +362,9 @@ static void tcg_out_label(TCGContext *s, TCGLabel *l)
tcg_debug_assert(!l->has_value);
l->has_value = 1;
l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
+#if defined(EMSCRIPTEN)
+ tcg_out_label_cb(s, l);
+#endif
}
TCGLabel *gen_new_label(void)
@@ -1139,7 +1146,7 @@ static TCGHelperInfo info_helper_st128_mmu = {
| dh_typemask(ptr, 5) /* uintptr_t ra */
};
-#ifdef CONFIG_TCG_INTERPRETER
+#if defined(CONFIG_TCG_INTERPRETER) || defined(EMSCRIPTEN)
static ffi_type *typecode_to_ffi(int argmask)
{
/*
@@ -1593,7 +1600,7 @@ void tcg_prologue_init(void)
s->code_buf = s->code_gen_ptr;
s->data_gen_ptr = NULL;
-#ifndef CONFIG_TCG_INTERPRETER
+#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN)
tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
#endif
@@ -1649,11 +1656,11 @@ void tcg_prologue_init(void)
}
}
-#ifndef CONFIG_TCG_INTERPRETER
+#if !defined(CONFIG_TCG_INTERPRETER) && !defined(EMSCRIPTEN)
/*
* Assert that goto_ptr is implemented completely, setting an epilogue.
- * For tci, we use NULL as the signal to return from the interpreter,
- * so skip this check.
+ * For tci and wasm backend, we use NULL as the signal to return from the
+ * interpreter, so skip this check.
*/
tcg_debug_assert(tcg_code_gen_epilogue != NULL);
#endif
@@ -6505,6 +6512,13 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
#endif
+#if defined(EMSCRIPTEN)
+ i = tcg_out_tb_end(s);
+ if (i < 0) {
+ return i;
+ }
+#endif
+
return tcg_current_code_size(s);
}
Out of curiosity, have you tried to run a big-endian guest?